diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
172 files changed, 128993 insertions, 0 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp new file mode 100644 index 0000000..c50f8b5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -0,0 +1,943 @@ +//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AggressiveAntiDepBreaker class, which +// implements register anti-dependence breaking during post-RA +// scheduling. It attempts to break all anti-dependencies within a +// block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "AggressiveAntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod +static cl::opt<int> +DebugDiv("agg-antidep-debugdiv", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); +static cl::opt<int> +DebugMod("agg-antidep-debugmod", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); + +AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, + MachineBasicBlock *BB) : + NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), + GroupNodeIndices(TargetRegs, 0), + KillIndices(TargetRegs, 0), + DefIndices(TargetRegs, 0) +{ + const unsigned BBSize = BB->size(); + for (unsigned i = 0; i < NumTargetRegs; ++i) { + // Initialize all registers to be in their own group. Initially we + // assign the register to the same-indexed GroupNode. + GroupNodeIndices[i] = i; + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } +} + +unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) { + unsigned Node = GroupNodeIndices[Reg]; + while (GroupNodes[Node] != Node) + Node = GroupNodes[Node]; + + return Node; +} + +void AggressiveAntiDepState::GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) +{ + for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) { + if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0)) + Regs.push_back(Reg); + } +} + +unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) +{ + assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!"); + assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!"); + + // find group for each register + unsigned Group1 = GetGroup(Reg1); + unsigned Group2 = GetGroup(Reg2); + + // if either group is 0, then that must become the parent + unsigned Parent = (Group1 == 0) ? Group1 : Group2; + unsigned Other = (Parent == Group1) ? Group2 : Group1; + GroupNodes.at(Other) = Parent; + return Parent; +} + +unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) +{ + // Create a new GroupNode for Reg. Reg's existing GroupNode must + // stay as is because there could be other GroupNodes referring to + // it. + unsigned idx = GroupNodes.size(); + GroupNodes.push_back(idx); + GroupNodeIndices[Reg] = idx; + return idx; +} + +bool AggressiveAntiDepState::IsLive(unsigned Reg) +{ + // KillIndex must be defined and DefIndex not defined for a register + // to be live. + return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); +} + + + +AggressiveAntiDepBreaker:: +AggressiveAntiDepBreaker(MachineFunction& MFi, + const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector& CriticalPathRCs) : + AntiDepBreaker(), MF(MFi), + MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), + TRI(MF.getTarget().getRegisterInfo()), + RegClassInfo(RCI), + State(NULL) { + /* Collect a bitset of all registers that are only broken if they + are on the critical path. */ + for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { + BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]); + if (CriticalPathSet.none()) + CriticalPathSet = CPSet; + else + CriticalPathSet |= CPSet; + } + + DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); + DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; + r = CriticalPathSet.find_next(r)) + dbgs() << " " << TRI->getName(r)); + DEBUG(dbgs() << '\n'); +} + +AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { + delete State; +} + +void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { + assert(State == NULL); + State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); + + bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + } + } + + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(BB); + for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + unsigned Reg = *I; + if (!IsReturnBlock && !Pristine.test(Reg)) continue; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } +} + +void AggressiveAntiDepBreaker::FinishBlock() { + delete State; + State = NULL; +} + +void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) { + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + std::set<unsigned> PassthruRegs; + GetPassthruRegs(MI, PassthruRegs); + PrescanInstruction(MI, Count, PassthruRegs); + ScanInstruction(MI, Count); + + DEBUG(dbgs() << "Observe: "); + DEBUG(MI->dump()); + DEBUG(dbgs() << "\tRegs:"); + + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { + // If Reg is current live, then mark that it can't be renamed as + // we don't know the extent of its live-range anymore (now that it + // has been scheduled). If it is not live but was defined in the + // previous schedule region, then set its def index to the most + // conservative location (i.e. the beginning of the previous + // schedule region). + if (State->IsLive(Reg)) { + DEBUG(if (State->GetGroup(Reg) != 0) + dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg) << "->g0(region live-out)"); + State->UnionGroups(Reg, 0); + } else if ((DefIndices[Reg] < InsertPosIndex) + && (DefIndices[Reg] >= Count)) { + DefIndices[Reg] = Count; + } + } + DEBUG(dbgs() << '\n'); +} + +bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, + MachineOperand& MO) +{ + if (!MO.isReg() || !MO.isImplicit()) + return false; + + unsigned Reg = MO.getReg(); + if (Reg == 0) + return false; + + MachineOperand *Op = NULL; + if (MO.isDef()) + Op = MI->findRegisterUseOperand(Reg, true); + else + Op = MI->findRegisterDefOperand(Reg); + + return((Op != NULL) && Op->isImplicit()); +} + +void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, + std::set<unsigned>& PassthruRegs) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || + IsImplicitDefUse(MI, MO)) { + const unsigned Reg = MO.getReg(); + PassthruRegs.insert(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + PassthruRegs.insert(*SubRegs); + } + } +} + +/// AntiDepEdges - Return in Edges the anti- and output- dependencies +/// in SU that we want to consider for breaking. +static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { + SmallSet<unsigned, 4> RegSet; + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { + unsigned Reg = P->getReg(); + if (RegSet.count(Reg) == 0) { + Edges.push_back(&*P); + RegSet.insert(Reg); + } + } + } +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static const SUnit *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + if (SU != 0) { + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + const SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + } + + return (Next) ? Next->getSUnit() : 0; +} + +void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, + const char *tag, + const char *header, + const char *footer) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + if (!State->IsLive(Reg)) { + KillIndices[Reg] = KillIdx; + DefIndices[Reg] = ~0u; + RegRefs.erase(Reg); + State->LeaveGroup(Reg); + DEBUG(if (header != NULL) { + dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); + } + // Repeat for subregisters. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; + if (!State->IsLive(SubregReg)) { + KillIndices[SubregReg] = KillIdx; + DefIndices[SubregReg] = ~0u; + RegRefs.erase(SubregReg); + State->LeaveGroup(SubregReg); + DEBUG(if (header != NULL) { + dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << + State->GetGroup(SubregReg) << tag); + } + } + + DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer); +} + +void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, + unsigned Count, + std::set<unsigned>& PassthruRegs) { + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // Handle dead defs by simulating a last-use of the register just + // after the def. A dead def can occur because the def is truly + // dead, or because only a subregister is live at the def. If we + // don't do this the dead def will be incorrectly merged into the + // previous def. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); + } + + DEBUG(dbgs() << "\tDef Groups:"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); + + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) { + DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + State->UnionGroups(Reg, 0); + } + + // Any aliased that are live at this point are completely or + // partially defined here, so group those aliases with Reg. + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (State->IsLive(AliasReg)) { + State->UnionGroups(Reg, AliasReg); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << + TRI->getName(AliasReg) << ")"); + } + } + + // Note register reference... + const TargetRegisterClass *RC = NULL; + if (i < MI->getDesc().getNumOperands()) + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; + RegRefs.insert(std::make_pair(Reg, RR)); + } + + DEBUG(dbgs() << '\n'); + + // Scan the register defs for this instruction and update + // live-ranges. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + // Ignore KILLs and passthru registers for liveness... + if (MI->isKill() || (PassthruRegs.count(Reg) != 0)) + continue; + + // Update def for Reg and aliases. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + DefIndices[*AI] = Count; + } +} + +void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, + unsigned Count) { + DEBUG(dbgs() << "\tUse Groups:"); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // If MI's uses have special allocation requirement, don't allow + // any use registers to be changed. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + + // Scan the register uses for this instruction and update + // live-ranges, groups and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg)); + + // It wasn't previously live but now it is, this is a kill. Forget + // the previous live-range information and start a new live-range + // for the register. + HandleLastUse(Reg, Count, "(last-use)"); + + if (Special) { + DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + State->UnionGroups(Reg, 0); + } + + // Note register reference... + const TargetRegisterClass *RC = NULL; + if (i < MI->getDesc().getNumOperands()) + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; + RegRefs.insert(std::make_pair(Reg, RR)); + } + + DEBUG(dbgs() << '\n'); + + // Form a group of all defs and uses of a KILL instruction to ensure + // that all registers are renamed as a group. + if (MI->isKill()) { + DEBUG(dbgs() << "\tKill Group:"); + + unsigned FirstReg = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (FirstReg != 0) { + DEBUG(dbgs() << "=" << TRI->getName(Reg)); + State->UnionGroups(FirstReg, Reg); + } else { + DEBUG(dbgs() << " " << TRI->getName(Reg)); + FirstReg = Reg; + } + } + + DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); + } +} + +BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { + BitVector BV(TRI->getNumRegs(), false); + bool first = true; + + // Check all references that need rewriting for Reg. For each, use + // the corresponding register class to narrow the set of registers + // that are appropriate for renaming. + std::pair<std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator, + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator> + Range = State->GetRegRefs().equal_range(Reg); + for (std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, + QE = Range.second; Q != QE; ++Q) { + const TargetRegisterClass *RC = Q->second.RC; + if (RC == NULL) continue; + + BitVector RCBV = TRI->getAllocatableSet(MF, RC); + if (first) { + BV |= RCBV; + first = false; + } else { + BV &= RCBV; + } + + DEBUG(dbgs() << " " << RC->getName()); + } + + return BV; +} + +bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( + unsigned AntiDepGroupIndex, + RenameOrderType& RenameOrder, + std::map<unsigned, unsigned> &RenameMap) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // Collect all referenced registers in the same group as + // AntiDepReg. These all need to be renamed together if we are to + // break the anti-dependence. + std::vector<unsigned> Regs; + State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs); + assert(Regs.size() > 0 && "Empty register group!"); + if (Regs.size() == 0) + return false; + + // Find the "superest" register in the group. At the same time, + // collect the BitVector of registers that can be used to rename + // each register. + DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex + << ":\n"); + std::map<unsigned, BitVector> RenameRegisterMap; + unsigned SuperReg = 0; + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg)) + SuperReg = Reg; + + // If Reg has any references, then collect possible rename regs + if (RegRefs.count(Reg) > 0) { + DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); + + BitVector BV = GetRenameRegisters(Reg); + RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV)); + + DEBUG(dbgs() << " ::"); + DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r)) + dbgs() << " " << TRI->getName(r)); + DEBUG(dbgs() << "\n"); + } + } + + // All group registers should be a subreg of SuperReg. + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + if (Reg == SuperReg) continue; + bool IsSub = TRI->isSubRegister(SuperReg, Reg); + assert(IsSub && "Expecting group subregister"); + if (!IsSub) + return false; + } + +#ifndef NDEBUG + // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int renamecnt = 0; + if (renamecnt++ % DebugDiv != DebugMod) + return false; + + dbgs() << "*** Performing rename " << TRI->getName(SuperReg) << + " for debug ***\n"; + } +#endif + + // Check each possible rename register for SuperReg in round-robin + // order. If that register is available, and the corresponding + // registers are available for the other group subregisters, then we + // can use those registers to rename. + + // FIXME: Using getMinimalPhysRegClass is very conservative. We should + // check every use of the register and find the largest register class + // that can be used in all of them. + const TargetRegisterClass *SuperRC = + TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); + + ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC); + if (Order.empty()) { + DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); + return false; + } + + DEBUG(dbgs() << "\tFind Registers:"); + + if (RenameOrder.count(SuperRC) == 0) + RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); + + unsigned OrigR = RenameOrder[SuperRC]; + unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); + unsigned R = OrigR; + do { + if (R == 0) R = Order.size(); + --R; + const unsigned NewSuperReg = Order[R]; + // Don't consider non-allocatable registers + if (!MRI.isAllocatable(NewSuperReg)) continue; + // Don't replace a register with itself. + if (NewSuperReg == SuperReg) continue; + + DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':'); + RenameMap.clear(); + + // For each referenced group register (which must be a SuperReg or + // a subregister of SuperReg), find the corresponding subregister + // of NewSuperReg and make sure it is free to be renamed. + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + unsigned NewReg = 0; + if (Reg == SuperReg) { + NewReg = NewSuperReg; + } else { + unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg); + if (NewSubRegIdx != 0) + NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx); + } + + DEBUG(dbgs() << " " << TRI->getName(NewReg)); + + // Check if Reg can be renamed to NewReg. + BitVector BV = RenameRegisterMap[Reg]; + if (!BV.test(NewReg)) { + DEBUG(dbgs() << "(no rename)"); + goto next_super_reg; + } + + // If NewReg is dead and NewReg's most recent def is not before + // Regs's kill, it's safe to replace Reg with NewReg. We + // must also check all aliases of NewReg, because we can't define a + // register when any sub or super is already live. + if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { + DEBUG(dbgs() << "(live)"); + goto next_super_reg; + } else { + bool found = false; + for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (State->IsLive(AliasReg) || + (KillIndices[Reg] > DefIndices[AliasReg])) { + DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); + found = true; + break; + } + } + if (found) + goto next_super_reg; + } + + // Record that 'Reg' can be renamed to 'NewReg'. + RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg)); + } + + // If we fall-out here, then every register in the group can be + // renamed, as recorded in RenameMap. + RenameOrder.erase(SuperRC); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, R)); + DEBUG(dbgs() << "]\n"); + return true; + + next_super_reg: + DEBUG(dbgs() << ']'); + } while (R != EndR); + + DEBUG(dbgs() << '\n'); + + // No registers are free and available! + return false; +} + +/// BreakAntiDependencies - Identifiy anti-dependencies within the +/// ScheduleDAG and break them by renaming registers. +/// +unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( + const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) { + + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return 0; + + // For each regclass the next register to use for renaming. + RenameOrderType RenameOrder; + + // ...need a map from MI to SUnit. + std::map<MachineInstr *, const SUnit *> MISUnitMap; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(), + SU)); + } + + // Track progress along the critical path through the SUnit graph as + // we walk the instructions. This is needed for regclasses that only + // break critical-path anti-dependencies. + const SUnit *CriticalPathSU = 0; + MachineInstr *CriticalPathMI = 0; + if (CriticalPathSet.any()) { + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + if (!CriticalPathSU || + ((SU->getDepth() + SU->Latency) > + (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { + CriticalPathSU = SU; + } + } + + CriticalPathMI = CriticalPathSU->getInstr(); + } + +#ifndef NDEBUG + DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); + DEBUG(dbgs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (!State->IsLive(Reg)) + DEBUG(dbgs() << " " << TRI->getName(Reg)); + } + DEBUG(dbgs() << '\n'); +#endif + + // Attempt to break anti-dependence edges. Walk the instructions + // from the bottom up, tracking information about liveness as we go + // to help determine which registers are available. + unsigned Broken = 0; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = End, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + + if (MI->isDebugValue()) + continue; + + DEBUG(dbgs() << "Anti: "); + DEBUG(MI->dump()); + + std::set<unsigned> PassthruRegs; + GetPassthruRegs(MI, PassthruRegs); + + // Process the defs in MI... + PrescanInstruction(MI, Count, PassthruRegs); + + // The dependence edges that represent anti- and output- + // dependencies that are candidates for breaking. + std::vector<const SDep *> Edges; + const SUnit *PathSU = MISUnitMap[MI]; + AntiDepEdges(PathSU, Edges); + + // If MI is not on the critical path, then we don't rename + // registers in the CriticalPathSet. + BitVector *ExcludeRegs = NULL; + if (MI == CriticalPathMI) { + CriticalPathSU = CriticalPathStep(CriticalPathSU); + CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; + } else { + ExcludeRegs = &CriticalPathSet; + } + + // Ignore KILL instructions (they form a group in ScanInstruction + // but don't cause any anti-dependence breaking themselves) + if (!MI->isKill()) { + // Attempt to break each anti-dependency... + for (unsigned i = 0, e = Edges.size(); i != e; ++i) { + const SDep *Edge = Edges[i]; + SUnit *NextSU = Edge->getSUnit(); + + if ((Edge->getKind() != SDep::Anti) && + (Edge->getKind() != SDep::Output)) continue; + + unsigned AntiDepReg = Edge->getReg(); + DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + + if (!MRI.isAllocatable(AntiDepReg)) { + // Don't break anti-dependencies on non-allocatable registers. + DEBUG(dbgs() << " (non-allocatable)\n"); + continue; + } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) { + // Don't break anti-dependencies for critical path registers + // if not on the critical path + DEBUG(dbgs() << " (not critical-path)\n"); + continue; + } else if (PassthruRegs.count(AntiDepReg) != 0) { + // If the anti-dep register liveness "passes-thru", then + // don't try to change it. It will be changed along with + // the use if required to break an earlier antidep. + DEBUG(dbgs() << " (passthru)\n"); + continue; + } else { + // No anti-dep breaking for implicit deps + MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); + assert(AntiDepOp != NULL && + "Can't find index for defined register operand"); + if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) { + DEBUG(dbgs() << " (implicit)\n"); + continue; + } + + // If the SUnit has other dependencies on the SUnit that + // it anti-depends on, don't bother breaking the + // anti-dependency since those edges would prevent such + // units from being scheduled past each other + // regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), + PE = PathSU->Preds.end(); P != PE; ++P) { + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), + PE = PathSU->Preds.end(); P != PE; ++P) { + if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && + (P->getKind() != SDep::Output)) { + DEBUG(dbgs() << " (real dependency)\n"); + AntiDepReg = 0; + break; + } else if ((P->getSUnit() != NextSU) && + (P->getKind() == SDep::Data) && + (P->getReg() == AntiDepReg)) { + DEBUG(dbgs() << " (other dependency)\n"); + AntiDepReg = 0; + break; + } + } + + if (AntiDepReg == 0) continue; + } + + assert(AntiDepReg != 0); + if (AntiDepReg == 0) continue; + + // Determine AntiDepReg's register group. + const unsigned GroupIndex = State->GetGroup(AntiDepReg); + if (GroupIndex == 0) { + DEBUG(dbgs() << " (zero group)\n"); + continue; + } + + DEBUG(dbgs() << '\n'); + + // Look for a suitable register to use to break the anti-dependence. + std::map<unsigned, unsigned> RenameMap; + if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { + DEBUG(dbgs() << "\tBreaking anti-dependence edge on " + << TRI->getName(AntiDepReg) << ":"); + + // Handle each group register... + for (std::map<unsigned, unsigned>::iterator + S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) { + unsigned CurrReg = S->first; + unsigned NewReg = S->second; + + DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << + TRI->getName(NewReg) << "(" << + RegRefs.count(CurrReg) << " refs)"); + + // Update the references to the old register CurrReg to + // refer to the new register NewReg. + std::pair<std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator, + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator> + Range = RegRefs.equal_range(CurrReg); + for (std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) { + Q->second.Operand->setReg(NewReg); + // If the SU for the instruction being updated has debug + // information related to the anti-dependency register, make + // sure to update that as well. + const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; + if (!SU) continue; + for (DbgValueVector::iterator DVI = DbgValues.begin(), + DVE = DbgValues.end(); DVI != DVE; ++DVI) + if (DVI->second == Q->second.Operand->getParent()) + UpdateDbgValue(DVI->first, AntiDepReg, NewReg); + } + + // We just went back in time and modified history; the + // liveness information for CurrReg is now inconsistent. Set + // the state as if it were dead. + State->UnionGroups(NewReg, 0); + RegRefs.erase(NewReg); + DefIndices[NewReg] = DefIndices[CurrReg]; + KillIndices[NewReg] = KillIndices[CurrReg]; + + State->UnionGroups(CurrReg, 0); + RegRefs.erase(CurrReg); + DefIndices[CurrReg] = KillIndices[CurrReg]; + KillIndices[CurrReg] = ~0u; + assert(((KillIndices[CurrReg] == ~0u) != + (DefIndices[CurrReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + } + + ++Broken; + DEBUG(dbgs() << '\n'); + } + } + } + + ScanInstruction(MI, Count); + } + + return Broken; +} diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h new file mode 100644 index 0000000..6683630 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -0,0 +1,184 @@ +//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AggressiveAntiDepBreaker class, which +// implements register anti-dependence breaking during post-RA +// scheduling. It attempts to break all anti-dependencies within a +// block. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H +#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H + +#include "AntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <map> + +namespace llvm { +class RegisterClassInfo; + + /// Class AggressiveAntiDepState + /// Contains all the state necessary for anti-dep breaking. + class AggressiveAntiDepState { + public: + /// RegisterReference - Information about a register reference + /// within a liverange + typedef struct { + /// Operand - The registers operand + MachineOperand *Operand; + /// RC - The register class + const TargetRegisterClass *RC; + } RegisterReference; + + private: + /// NumTargetRegs - Number of non-virtual target registers + /// (i.e. TRI->getNumRegs()). + const unsigned NumTargetRegs; + + /// GroupNodes - Implements a disjoint-union data structure to + /// form register groups. A node is represented by an index into + /// the vector. A node can "point to" itself to indicate that it + /// is the parent of a group, or point to another node to indicate + /// that it is a member of the same group as that node. + std::vector<unsigned> GroupNodes; + + /// GroupNodeIndices - For each register, the index of the GroupNode + /// currently representing the group that the register belongs to. + /// Register 0 is always represented by the 0 group, a group + /// composed of registers that are not eligible for anti-aliasing. + std::vector<unsigned> GroupNodeIndices; + + /// RegRefs - Map registers to all their references within a live range. + std::multimap<unsigned, RegisterReference> RegRefs; + + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. + std::vector<unsigned> KillIndices; + + /// DefIndices - The index of the most recent complete def (proceding bottom + /// up), or ~0u if the register is live. + std::vector<unsigned> DefIndices; + + public: + AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); + + /// GetKillIndices - Return the kill indices. + std::vector<unsigned> &GetKillIndices() { return KillIndices; } + + /// GetDefIndices - Return the define indices. + std::vector<unsigned> &GetDefIndices() { return DefIndices; } + + /// GetRegRefs - Return the RegRefs map. + std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; } + + // GetGroup - Get the group for a register. The returned value is + // the index of the GroupNode representing the group. + unsigned GetGroup(unsigned Reg); + + // GetGroupRegs - Return a vector of the registers belonging to a + // group. If RegRefs is non-NULL then only included referenced registers. + void GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference> *RegRefs); + + // UnionGroups - Union Reg1's and Reg2's groups to form a new + // group. Return the index of the GroupNode representing the + // group. + unsigned UnionGroups(unsigned Reg1, unsigned Reg2); + + // LeaveGroup - Remove a register from its current group and place + // it alone in its own group. Return the index of the GroupNode + // representing the registers new group. + unsigned LeaveGroup(unsigned Reg); + + /// IsLive - Return true if Reg is live + bool IsLive(unsigned Reg); + }; + + + /// Class AggressiveAntiDepBreaker + class AggressiveAntiDepBreaker : public AntiDepBreaker { + MachineFunction& MF; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RegisterClassInfo &RegClassInfo; + + /// CriticalPathSet - The set of registers that should only be + /// renamed if they are on the critical path. + BitVector CriticalPathSet; + + /// State - The state used to identify and rename anti-dependence + /// registers. + AggressiveAntiDepState *State; + + public: + AggressiveAntiDepBreaker(MachineFunction& MFi, + const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector& CriticalPathRCs); + ~AggressiveAntiDepBreaker(); + + /// Start - Initialize anti-dep breaking for a new basic block. + void StartBlock(MachineBasicBlock *BB); + + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path + /// of the ScheduleDAG and break them by renaming registers. + /// + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + + /// Finish - Finish anti-dep breaking for a basic block. + void FinishBlock(); + + private: + /// Keep track of a position in the allocation order for each regclass. + typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType; + + /// IsImplicitDefUse - Return true if MO represents a register + /// that is both implicitly used and defined in MI + bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); + + /// GetPassthruRegs - If MI implicitly def/uses a register, then + /// return that register and all subregisters. + void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs); + + void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, + const char *header =NULL, const char *footer =NULL); + + void PrescanInstruction(MachineInstr *MI, unsigned Count, + std::set<unsigned>& PassthruRegs); + void ScanInstruction(MachineInstr *MI, unsigned Count); + BitVector GetRenameRegisters(unsigned Reg); + bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex, + RenameOrderType& RenameOrder, + std::map<unsigned, unsigned> &RenameMap); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp new file mode 100644 index 0000000..3fa1f8f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -0,0 +1,52 @@ +//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an allocation order for virtual registers. +// +// The preferred allocation order for a virtual register depends on allocation +// hints and target hooks. The AllocationOrder class encapsulates all of that. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "AllocationOrder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Compare VirtRegMap::getRegAllocPref(). +AllocationOrder::AllocationOrder(unsigned VirtReg, + const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo) + : Pos(0) { + const MachineFunction &MF = VRM.getMachineFunction(); + const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); + Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); + TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM); + rewind(); + + DEBUG({ + if (!Hints.empty()) { + dbgs() << "hints:"; + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + dbgs() << ' ' << PrintReg(Hints[I], TRI); + dbgs() << '\n'; + } + }); +#ifndef NDEBUG + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() && + "Target hint is outside allocation order."); +#endif +} diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h new file mode 100644 index 0000000..aed461a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h @@ -0,0 +1,85 @@ +//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an allocation order for virtual registers. +// +// The preferred allocation order for a virtual register depends on allocation +// hints and target hooks. The AllocationOrder class encapsulates all of that. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H +#define LLVM_CODEGEN_ALLOCATIONORDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace llvm { + +class RegisterClassInfo; +class VirtRegMap; + +class AllocationOrder { + SmallVector<MCPhysReg, 16> Hints; + ArrayRef<MCPhysReg> Order; + int Pos; + +public: + /// Create a new AllocationOrder for VirtReg. + /// @param VirtReg Virtual register to allocate for. + /// @param VRM Virtual register map for function. + /// @param RegClassInfo Information about reserved and allocatable registers. + AllocationOrder(unsigned VirtReg, + const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo); + + /// Get the allocation order without reordered hints. + ArrayRef<MCPhysReg> getOrder() const { return Order; } + + /// Return the next physical register in the allocation order, or 0. + /// It is safe to call next() again after it returned 0, it will keep + /// returning 0 until rewind() is called. + unsigned next() { + if (Pos < 0) + return Hints.end()[Pos++]; + while (Pos < int(Order.size())) { + unsigned Reg = Order[Pos++]; + if (!isHint(Reg)) + return Reg; + } + return 0; + } + + /// As next(), but allow duplicates to be returned, and stop before the + /// Limit'th register in the RegisterClassInfo allocation order. + /// + /// This can produce more than Limit registers if there are hints. + unsigned nextWithDups(unsigned Limit) { + if (Pos < 0) + return Hints.end()[Pos++]; + if (Pos < int(Limit)) + return Order[Pos++]; + return 0; + } + + /// Start over from the beginning. + void rewind() { Pos = -int(Hints.size()); } + + /// Return true if the last register returned from next() was a preferred register. + bool isHint() const { return Pos <= 0; } + + /// Return true if PhysReg is a preferred register. + bool isHint(unsigned PhysReg) const { + return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end(); + } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp new file mode 100644 index 0000000..dd7282c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -0,0 +1,349 @@ +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several CodeGen-specific LLVM IR analysis utilties. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +unsigned llvm::ComputeLinearIndex(Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast<StructType>(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// EVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, + SmallVectorImpl<EVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets, + uint64_t StartingOffset) { + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty->isVoidTy()) + return; + // Base case: we can get an EVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +GlobalVariable *llvm::ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + + if (GV && GV->getName() == "llvm.eh.catch.all.value") { + assert(GV->hasInitializer() && + "The EH catch-all value must have an initializer"); + Value *Init = GV->getInitializer(); + GV = dyn_cast<GlobalVariable>(Init); + if (!GV) V = cast<ConstantPointerNull>(Init); + } + + assert((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +bool +llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { + switch (Pred) { + case FCmpInst::FCMP_FALSE: return ISD::SETFALSE; + case FCmpInst::FCMP_OEQ: return ISD::SETOEQ; + case FCmpInst::FCMP_OGT: return ISD::SETOGT; + case FCmpInst::FCMP_OGE: return ISD::SETOGE; + case FCmpInst::FCMP_OLT: return ISD::SETOLT; + case FCmpInst::FCMP_OLE: return ISD::SETOLE; + case FCmpInst::FCMP_ONE: return ISD::SETONE; + case FCmpInst::FCMP_ORD: return ISD::SETO; + case FCmpInst::FCMP_UNO: return ISD::SETUO; + case FCmpInst::FCMP_UEQ: return ISD::SETUEQ; + case FCmpInst::FCMP_UGT: return ISD::SETUGT; + case FCmpInst::FCMP_UGE: return ISD::SETUGE; + case FCmpInst::FCMP_ULT: return ISD::SETULT; + case FCmpInst::FCMP_ULE: return ISD::SETULE; + case FCmpInst::FCMP_UNE: return ISD::SETUNE; + case FCmpInst::FCMP_TRUE: return ISD::SETTRUE; + default: llvm_unreachable("Invalid FCmp predicate opcode!"); + } +} + +ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) { + switch (CC) { + case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ; + case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE; + case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT; + case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE; + case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT; + case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE; + default: return CC; + } +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + llvm_unreachable("Invalid ICmp predicate opcode!"); + } +} + + +/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look +/// through it (and any transitive noop operands to it) and return its input +/// value. This is used to determine if a tail call can be formed. +/// +static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { + // If V is not an instruction, it can't be looked through. + const Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V; + + Value *Op = I->getOperand(0); + + // Look through truly no-op truncates. + if (isa<TruncInst>(I) && + TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType())) + return getNoopInput(I->getOperand(0), TLI); + + // Look through truly no-op bitcasts. + if (isa<BitCastInst>(I)) { + // No type change at all. + if (Op->getType() == I->getType()) + return getNoopInput(Op, TLI); + + // Pointer to pointer cast. + if (Op->getType()->isPointerTy() && I->getType()->isPointerTy()) + return getNoopInput(Op, TLI); + + if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) && + TLI.isTypeLegal(EVT::getEVT(Op->getType())) && + TLI.isTypeLegal(EVT::getEVT(I->getType()))) + return getNoopInput(Op, TLI); + } + + // Look through inttoptr. + if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) { + // Make sure this isn't a truncating or extending cast. We could support + // this eventually, but don't bother for now. + if (TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + return getNoopInput(Op, TLI); + } + + // Look through ptrtoint. + if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) { + // Make sure this isn't a truncating or extending cast. We could support + // this eventually, but don't bother for now. + if (TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + return getNoopInput(Op, TLI); + } + + + // Otherwise it's not something we can look through. + return V; +} + + +/// Test if the given instruction is in a position to be optimized +/// with a tail-call. This roughly means that it's in a block with +/// a return and there's nothing that needs to be scheduled +/// between it and the return. +/// +/// This function only tests target-independent requirements. +bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ + const Instruction *I = CS.getInstruction(); + const BasicBlock *ExitBB = I->getParent(); + const TerminatorInst *Term = ExitBB->getTerminator(); + const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); + + // The block must end in a return statement or unreachable. + // + // FIXME: Decline tailcall if it's not guaranteed and if the block ends in + // an unreachable, for now. The way tailcall optimization is currently + // implemented means it will add an epilogue followed by a jump. That is + // not profitable. Also, if the callee is a special function (e.g. + // longjmp on x86), it can end up causing miscompilation that has not + // been fully understood. + if (!Ret && + (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || + !isa<UnreachableInst>(Term))) + return false; + + // If I will have a chain, make sure no other instruction that will have a + // chain interposes between I and the return. + if (I->mayHaveSideEffects() || I->mayReadFromMemory() || + !isSafeToSpeculativelyExecute(I)) + for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; + --BBI) { + if (&*BBI == I) + break; + // Debug info intrinsics do not get in the way of tail call optimization. + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !isSafeToSpeculativelyExecute(BBI)) + return false; + } + + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + const Function *F = ExitBB->getParent(); + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; + + // Otherwise, make sure the unmodified return value of I is the return value. + // We handle two cases: multiple return values + scalars. + Value *RetVal = Ret->getOperand(0); + if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType())) + // Handle scalars first. + return getNoopInput(Ret->getOperand(0), TLI) == I; + + // If this is an aggregate return, look through the insert/extract values and + // see if each is transparent. + for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements(); + i != e; ++i) { + const Value *InScalar = FindInsertedValue(RetVal, i); + if (InScalar == 0) return false; + InScalar = getNoopInput(InScalar, TLI); + + // If the scalar value being inserted is an extractvalue of the right index + // from the call, then everything is good. + const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar); + if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 || + EVI->getIndices()[0] != i) + return false; + } + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h new file mode 100644 index 0000000..df47f98 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h @@ -0,0 +1,71 @@ +//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AntiDepBreaker class, which implements +// anti-dependence breaking heuristics for post-register-allocation scheduling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H +#define LLVM_CODEGEN_ANTIDEPBREAKER_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <vector> + +namespace llvm { + +/// AntiDepBreaker - This class works into conjunction with the +/// post-RA scheduler to rename registers to break register +/// anti-dependencies. +class AntiDepBreaker { +public: + typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > + DbgValueVector; + + virtual ~AntiDepBreaker(); + + /// Start - Initialize anti-dep breaking for a new basic block. + virtual void StartBlock(MachineBasicBlock *BB) =0; + + /// BreakAntiDependencies - Identifiy anti-dependencies within a + /// basic-block region and break them by renaming registers. Return + /// the number of anti-dependencies broken. + /// + virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) = 0; + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + virtual void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) =0; + + /// Finish - Finish anti-dep breaking for a basic block. + virtual void FinishBlock() =0; + + /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating + /// other machine instruction to use NewReg. + void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) { + assert (MI->isDebugValue() && "MI is not DBG_VALUE!"); + if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg) + MI->getOperand(0).setReg(NewReg); + } +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp new file mode 100644 index 0000000..188047d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -0,0 +1,136 @@ +//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +static cl::opt<bool> +EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, + cl::desc("Generate ARM EHABI tables with unwinding descriptors"), + cl::init(false)); + + +ARMException::ARMException(AsmPrinter *A) + : DwarfException(A) {} + +ARMException::~ARMException() {} + +void ARMException::EndModule() { +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void ARMException::BeginFunction(const MachineFunction *MF) { + Asm->OutStreamer.EmitFnStart(); + if (Asm->MF->getFunction()->needsUnwindTableEntry()) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void ARMException::EndFunction() { + if (!Asm->MF->getFunction()->needsUnwindTableEntry()) + Asm->OutStreamer.EmitCantUnwind(); + else { + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + if (EnableARMEHABIDescriptors) { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + if (!MMI->getLandingPads().empty()) { + // Emit references to personality. + if (const Function * Personality = + MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + Asm->OutStreamer.EmitPersonality(PerSym); + } + + // Emit .handlerdata directive. + Asm->OutStreamer.EmitHandlerData(); + + // Emit actual exception table + EmitExceptionTable(); + } + } + } + + Asm->OutStreamer.EmitFnEnd(); +} + +void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + + int Entry = 0; + // Emit the Catch TypeInfos. + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer.AddBlankLine(); + Entry = TypeInfos.size(); + } + + for (std::vector<const GlobalVariable *>::const_reverse_iterator + I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { + const GlobalVariable *GV = *I; + if (VerboseAsm) + Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); + Asm->EmitTTypeReference(GV, TTypeEncoding); + } + + // Emit the Exception Specifications. + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer.AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer.AddBlankLine(); + Entry = 0; + } + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + if (VerboseAsm) { + --Entry; + if (TypeID != 0) + Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); + } + + Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]), + TTypeEncoding); + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp new file mode 100644 index 0000000..d4a745d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -0,0 +1,2170 @@ +//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "llvm/CodeGen/AsmPrinter.h" +#include "DwarfDebug.h" +#include "DwarfException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Timer.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +static const char *DWARFGroupName = "DWARF Emission"; +static const char *DbgTimerName = "DWARF Debug Writer"; +static const char *EHTimerName = "DWARF Exception Writer"; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +char AsmPrinter::ID = 0; + +typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type; +static gcp_map_type &getGCMap(void *&P) { + if (P == 0) + P = new gcp_map_type(); + return *(gcp_map_type*)P; +} + + +/// getGVAlignmentLog2 - Return the alignment to use for the specified global +/// value in log2 form. This rounds up to the preferred alignment if possible +/// and legal. +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, + unsigned InBits = 0) { + unsigned NumBits = 0; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + NumBits = TD.getPreferredAlignmentLog(GVar); + + // If InBits is specified, round it to it. + if (InBits > NumBits) + NumBits = InBits; + + // If the GV has a specified alignment, take it into account. + if (GV->getAlignment() == 0) + return NumBits; + + unsigned GVAlign = Log2_32(GV->getAlignment()); + + // If the GVAlign is larger than NumBits, or if we are required to obey + // NumBits because the GV has an assigned section, obey it. + if (GVAlign > NumBits || GV->hasSection()) + NumBits = GVAlign; + return NumBits; +} + +AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) + : MachineFunctionPass(ID), + TM(tm), MAI(tm.getMCAsmInfo()), + OutContext(Streamer.getContext()), + OutStreamer(Streamer), + LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { + DD = 0; DE = 0; MMI = 0; LI = 0; + CurrentFnSym = CurrentFnSymForSize = 0; + GCMetadataPrinters = 0; + VerboseAsm = Streamer.isVerboseAsm(); +} + +AsmPrinter::~AsmPrinter() { + assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); + + if (GCMetadataPrinters != 0) { + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + + for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I) + delete I->second; + delete &GCMap; + GCMetadataPrinters = 0; + } + + delete &OutStreamer; +} + +/// getFunctionNumber - Return a unique ID for the current function. +/// +unsigned AsmPrinter::getFunctionNumber() const { + return MF->getFunctionNumber(); +} + +const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { + return TM.getTargetLowering()->getObjFileLowering(); +} + +/// getDataLayout - Return information about data layout. +const DataLayout &AsmPrinter::getDataLayout() const { + return *TM.getDataLayout(); +} + +/// getCurrentSection() - Return the current section we are emitting to. +const MCSection *AsmPrinter::getCurrentSection() const { + return OutStreamer.getCurrentSection(); +} + + + +void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); + if (isVerbose()) + AU.addRequired<MachineLoopInfo>(); +} + +bool AsmPrinter::doInitialization(Module &M) { + OutStreamer.InitStreamer(); + + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MMI->AnalyzeModule(M); + + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) + .Initialize(OutContext, TM); + + Mang = new Mangler(OutContext, *TM.getDataLayout()); + + // Allow the target to emit any magic that it wants at the start of the file. + EmitStartOfAsmFile(M); + + // Very minimal debug info. It is ignored if we emit actual debug info. If we + // don't, this at least helps the user find where a global came from. + if (MAI->hasSingleParameterDotFile()) { + // .file "foo.c" + OutStreamer.EmitFileDirective(M.getModuleIdentifier()); + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + MP->beginAssembly(*this); + + // Emit module-level inline asm if it exists. + if (!M.getModuleInlineAsm().empty()) { + OutStreamer.AddComment("Start of file scope inline assembly"); + OutStreamer.AddBlankLine(); + EmitInlineAsm(M.getModuleInlineAsm()+"\n"); + OutStreamer.AddComment("End of file scope inline assembly"); + OutStreamer.AddBlankLine(); + } + + if (MAI->doesSupportDebugInformation()) + DD = new DwarfDebug(this, &M); + + switch (MAI->getExceptionHandlingType()) { + case ExceptionHandling::None: + return false; + case ExceptionHandling::SjLj: + case ExceptionHandling::DwarfCFI: + DE = new DwarfCFIException(this); + return false; + case ExceptionHandling::ARM: + DE = new ARMException(this); + return false; + case ExceptionHandling::Win64: + DE = new Win64Exception(this); + return false; + } + + llvm_unreachable("Unknown exception type."); +} + +void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { + switch ((GlobalValue::LinkageTypes)Linkage) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: + if (MAI->getWeakDefDirective() != 0) { + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + + if ((GlobalValue::LinkageTypes)Linkage != + GlobalValue::LinkOnceODRAutoHideLinkage) + // .weak_definition _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + else + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); + } else if (MAI->getLinkOnceDirective() != 0) { + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + //NOTE: linkonce is handled by the section the symbol was assigned to. + } else { + // .weak _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); + } + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol. + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + break; + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + case GlobalValue::LinkerPrivateLinkage: + break; + default: + llvm_unreachable("Unknown linkage type!"); + } +} + + +/// EmitGlobalVariable - Emit the specified global variable to the .s file. +void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + if (GV->hasInitializer()) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; + + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), GV, + /*PrintType=*/false, GV->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + } + + MCSymbol *GVSym = Mang->getSymbol(GV); + EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); + + if (!GV->hasInitializer()) // External globals require no extra code. + return; + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + + const DataLayout *TD = TM.getDataLayout(); + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + + // If the alignment is specified, we *must* obey it. Overaligning a global + // with a specified alignment is a prompt way to break globals emitted to + // sections and expected to be contiguous (e.g. ObjC metadata). + unsigned AlignLog = getGVAlignmentLog2(GV, *TD); + + // Handle common and BSS local symbols (.lcomm). + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + unsigned Align = 1 << AlignLog; + + // Handle common symbols. + if (GVKind.isCommon()) { + if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) + Align = 0; + + // .comm _foo, 42, 4 + OutStreamer.EmitCommonSymbol(GVSym, Size, Align); + return; + } + + // Handle local BSS symbols. + if (MAI->hasMachoZeroFillDirective()) { + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + // .zerofill __DATA, __bss, _foo, 400, 5 + OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align); + return; + } + + // Use .lcomm only if it supports user-specified alignment. + // Otherwise, while it would still be correct to use .lcomm in some + // cases (e.g. when Align == 1), the external assembler might enfore + // some -unknown- default alignment behavior, which could cause + // spurious differences between external and integrated assembler. + // Prefer to simply fall back to .local / .comm in this case. + if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { + // .lcomm _foo, 42 + OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align); + return; + } + + if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) + Align = 0; + + // .local _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local); + // .comm _foo, 42, 4 + OutStreamer.EmitCommonSymbol(GVSym, Size, Align); + return; + } + + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + + // Handle the zerofill directive on darwin, which is a special form of BSS + // emission. + if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. + + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + // .zerofill __DATA, __common, _foo, 400, 5 + OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); + return; + } + + // Handle thread local data for mach-o which requires us to output an + // additional structure of data and mangle the original symbol so that we + // can reference it later. + // + // TODO: This should become an "emit thread local global" method on TLOF. + // All of this macho specific stuff should be sunk down into TLOFMachO and + // stuff like "TLSExtraDataSection" should no longer be part of the parent + // TLOF class. This will also make it more obvious that stuff like + // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho + // specific code. + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + // Emit the .tbss symbol + MCSymbol *MangSym = + OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); + + if (GVKind.isThreadBSS()) + OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + else if (GVKind.isThreadData()) { + OutStreamer.SwitchSection(TheSection); + + EmitAlignment(AlignLog, GV); + OutStreamer.EmitLabel(MangSym); + + EmitGlobalConstant(GV->getInitializer()); + } + + OutStreamer.AddBlankLine(); + + // Emit the variable struct for the runtime. + const MCSection *TLVSect + = getObjFileLowering().getTLSExtraDataSection(); + + OutStreamer.SwitchSection(TLVSect); + // Emit the linkage here. + EmitLinkage(GV->getLinkage(), GVSym); + OutStreamer.EmitLabel(GVSym); + + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer, used when mapped by the runtime + // - pointer to mangled symbol above with initializer + unsigned PtrSize = TD->getPointerSizeInBits()/8; + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), + PtrSize); + OutStreamer.EmitIntValue(0, PtrSize); + OutStreamer.EmitSymbolValue(MangSym, PtrSize); + + OutStreamer.AddBlankLine(); + return; + } + + OutStreamer.SwitchSection(TheSection); + + EmitLinkage(GV->getLinkage(), GVSym); + EmitAlignment(AlignLog, GV); + + OutStreamer.EmitLabel(GVSym); + + EmitGlobalConstant(GV->getInitializer()); + + if (MAI->hasDotTypeDotSizeDirective()) + // .size foo, 42 + OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); + + OutStreamer.AddBlankLine(); +} + +/// EmitFunctionHeader - This method emits the header for the current +/// function. +void AsmPrinter::EmitFunctionHeader() { + // Print out constants referenced by the function + EmitConstantPool(); + + // Print the 'header' of function. + const Function *F = MF->getFunction(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + EmitVisibility(CurrentFnSym, F->getVisibility()); + + EmitLinkage(F->getLinkage(), CurrentFnSym); + EmitAlignment(MF->getAlignment(), F); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), F, + /*PrintType=*/false, F->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + + // Emit the CurrentFnSym. This is a virtual function to allow targets to + // do their wild and crazy things as required. + EmitFunctionEntryLabel(); + + // If the function had address-taken blocks that got deleted, then we have + // references to the dangling symbols. Emit them at the start of the function + // so that we don't get references to undefined symbols. + std::vector<MCSymbol*> DeadBlockSyms; + MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms); + for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { + OutStreamer.AddComment("Address taken block that was later removed"); + OutStreamer.EmitLabel(DeadBlockSyms[i]); + } + + // Add some workaround for linkonce linkage on Cygwin\MinGW. + if (MAI->getLinkOnceDirective() != 0 && + (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { + // FIXME: What is this? + MCSymbol *FakeStub = + OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ + CurrentFnSym->getName()); + OutStreamer.EmitLabel(FakeStub); + } + + // Emit pre-function debug and/or EH information. + if (DE) { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->BeginFunction(MF); + } + if (DD) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginFunction(MF); + } +} + +/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the +/// function. This can be overridden by targets as required to do custom stuff. +void AsmPrinter::EmitFunctionEntryLabel() { + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (CurrentFnSym->isUndefined()) + return OutStreamer.EmitLabel(CurrentFnSym); + + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); +} + +/// emitComments - Pretty-print comments for instructions. +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetMachine &TM = MF->getTarget(); + + // Check for spills and reloads + int FI; + + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + + // We assume a single instruction only has a spill or reload, not + // both. + const MachineMemOperand *MMO; + if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Reload\n"; + } + } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Reload\n"; + } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Spill\n"; + } + } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Spill\n"; + } + + // Check for spill-induced copies + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; +} + +/// emitImplicitDef - This method emits the specified machine instruction +/// that is an implicit def. +static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { + unsigned RegNo = MI->getOperand(0).getReg(); + AP.OutStreamer.AddComment(Twine("implicit-def: ") + + AP.TM.getRegisterInfo()->getName(RegNo)); + AP.OutStreamer.AddBlankLine(); +} + +static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { + std::string Str = "kill:"; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + assert(Op.isReg() && "KILL instruction must have only register operands"); + Str += ' '; + Str += AP.TM.getRegisterInfo()->getName(Op.getReg()); + Str += (Op.isDef() ? "<def>" : "<kill>"); + } + AP.OutStreamer.AddComment(Str); + AP.OutStreamer.AddBlankLine(); +} + +/// emitDebugValueComment - This method handles the target-independent form +/// of DBG_VALUE, returning true if it was able to do so. A false return +/// means the target will need to handle MI in EmitInstruction. +static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + // This code handles only the 3-operand target-independent form. + if (MI->getNumOperands() != 3) + return false; + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: "; + + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); + if (V.getContext().isSubprogram()) + OS << DISubprogram(V.getContext()).getDisplayName() << ":"; + OS << V.getName() << " <- "; + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + OS << (double)APF.convertToFloat(); + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + OS << APF.convertToDouble(); + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + OS << "(long double) " << APF.convertToDouble(); + } + } else if (MI->getOperand(0).isImm()) { + OS << MI->getOperand(0).getImm(); + } else if (MI->getOperand(0).isCImm()) { + MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); + } else { + assert(MI->getOperand(0).isReg() && "Unknown operand type"); + if (MI->getOperand(0).getReg() == 0) { + // Suppress offset, it is not meaningful here. + OS << "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; + } + OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + } + + OS << '+' << MI->getOperand(1).getImm(); + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; +} + +AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { + if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && + MF->getFunction()->needsUnwindTableEntry()) + return CFI_M_EH; + + if (MMI->hasDebugInfo()) + return CFI_M_Debug; + + return CFI_M_None; +} + +bool AsmPrinter::needsSEHMoves() { + return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 && + MF->getFunction()->needsUnwindTableEntry(); +} + +bool AsmPrinter::needsRelocationsForDwarfStringPool() const { + return MAI->doesDwarfUseRelocationsAcrossSections(); +} + +void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + + if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + return; + + if (needsCFIMoves() == CFI_M_None) + return; + + if (MMI->getCompactUnwindEncoding() != 0) + OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); + + MachineModuleInfo &MMI = MF->getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + bool FoundOne = false; + (void)FoundOne; + for (std::vector<MachineMove>::iterator I = Moves.begin(), + E = Moves.end(); I != E; ++I) { + if (I->getLabel() == Label) { + EmitCFIFrameMove(*I); + FoundOne = true; + } + } + assert(FoundOne); +} + +/// EmitFunctionBody - This method emits the body and trailer for a +/// function. +void AsmPrinter::EmitFunctionBody() { + // Emit target-specific gunk before the function body. + EmitFunctionBodyStart(); + + bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); + + // Print out code for the function. + bool HasAnyRealCode = false; + const MachineInstr *LastMI = 0; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + LastMI = II; + + // Print the assembly for the instruction. + if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && + !II->isDebugValue()) { + HasAnyRealCode = true; + ++EmittedInsts; + } + + if (ShouldPrintDebugScopes) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginInstruction(II); + } + + if (isVerbose()) + emitComments(*II, OutStreamer.GetCommentOS()); + + switch (II->getOpcode()) { + case TargetOpcode::PROLOG_LABEL: + emitPrologLabel(*II); + break; + + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); + break; + case TargetOpcode::INLINEASM: + EmitInlineAsm(II); + break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) { + if (!emitDebugValueComment(II, *this)) + EmitInstruction(II); + } + break; + case TargetOpcode::IMPLICIT_DEF: + if (isVerbose()) emitImplicitDef(II, *this); + break; + case TargetOpcode::KILL: + if (isVerbose()) emitKill(II, *this); + break; + default: + if (!TM.hasMCUseLoc()) + MCLineEntry::Make(&OutStreamer, getCurrentSection()); + + EmitInstruction(II); + break; + } + + if (ShouldPrintDebugScopes) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endInstruction(II); + } + } + } + + // If the last instruction was a prolog label, then we have a situation where + // we emitted a prolog but no function body. This results in the ending prolog + // label equaling the end of function label and an invalid "row" in the + // FDE. We need to emit a noop in this situation so that the FDE's rows are + // valid. + bool RequiresNoop = LastMI && LastMI->isPrologLabel(); + + // If the function is empty and the object file uses .subsections_via_symbols, + // then we need to emit *something* to the function body to prevent the + // labels from collapsing together. Just emit a noop. + if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) { + MCInst Noop; + TM.getInstrInfo()->getNoopForMachoTarget(Noop); + if (Noop.getOpcode()) { + OutStreamer.AddComment("avoids zero-length function"); + OutStreamer.EmitInstruction(Noop); + } else // Target not mc-ized yet. + OutStreamer.EmitRawText(StringRef("\tnop\n")); + } + + const Function *F = MF->getFunction(); + for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) { + const BasicBlock *BB = i; + if (!BB->hasAddressTaken()) + continue; + MCSymbol *Sym = GetBlockAddressSymbol(BB); + if (Sym->isDefined()) + continue; + OutStreamer.AddComment("Address of block that was removed by CodeGen"); + OutStreamer.EmitLabel(Sym); + } + + // Emit target-specific gunk after the function body. + EmitFunctionBodyEnd(); + + // If the target wants a .size directive for the size of the function, emit + // it. + if (MAI->hasDotTypeDotSizeDirective()) { + // Create a symbol for the end of function, so we can get the size as + // difference between the function label and the temp label. + MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(FnEndLabel); + + const MCExpr *SizeExp = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), + MCSymbolRefExpr::Create(CurrentFnSymForSize, + OutContext), + OutContext); + OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); + } + + // Emit post-function debug information. + if (DD) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endFunction(MF); + } + if (DE) { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->EndFunction(); + } + MMI->EndFunction(); + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(); + + OutStreamer.AddBlankLine(); +} + +/// getDebugValueLocation - Get location information encoded by DBG_VALUE +/// operands. +MachineLocation AsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { + // Target specific DBG_VALUE instructions are handled by each target. + return MachineLocation(); +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + + for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0; + ++SR) { + Reg = TRI->getDwarfRegNum(*SR, false); + // FIXME: Get the bit range this register uses of the superregister + // so that we can produce a DW_OP_bit_piece + } + + // FIXME: Handle cases like a super register being encoded as + // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33 + + // FIXME: We have no reasonable way of handling errors in here. The + // caller might be in the middle of an dwarf expression. We should + // probably assert that Reg >= 0 once debug info generation is more mature. + + if (int Offset = MLoc.getOffset()) { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); + EmitInt8(dwarf::DW_OP_breg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_bregx"); + EmitInt8(dwarf::DW_OP_bregx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + EmitSLEB128(Offset); + } else { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_regx"); + EmitInt8(dwarf::DW_OP_regx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + } + + // FIXME: Produce a DW_OP_bit_piece if we used a superregister +} + +bool AsmPrinter::doFinalization(Module &M) { + // Emit global variables. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobalVariable(I); + + // Emit visibility info for declarations + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { + const Function &F = *I; + if (!F.isDeclaration()) + continue; + GlobalValue::VisibilityTypes V = F.getVisibility(); + if (V == GlobalValue::DefaultVisibility) + continue; + + MCSymbol *Name = Mang->getSymbol(&F); + EmitVisibility(Name, V, false); + } + + // Emit module flags. + SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + if (!ModuleFlags.empty()) + getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + + // Finalize debug and EH information. + if (DE) { + { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->EndModule(); + } + delete DE; DE = 0; + } + if (DD) { + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endModule(); + } + delete DD; DD = 0; + } + + // If the target wants to know about weak references, print them all. + if (MAI->getWeakRefDirective()) { + // FIXME: This is not lazy, it would be nice to only print weak references + // to stuff that is actually used. Note that doing so would require targets + // to notice uses in operands (due to constant exprs etc). This should + // happen with the MC stuff eventually. + + // Print out module-level global variables here. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (!I->hasExternalWeakLinkage()) continue; + OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + } + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (!I->hasExternalWeakLinkage()) continue; + OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + } + } + + if (MAI->hasSetDirective()) { + OutStreamer.AddBlankLine(); + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E; ++I) { + MCSymbol *Name = Mang->getSymbol(I); + + const GlobalValue *GV = I->getAliasedGlobal(); + MCSymbol *Target = Mang->getSymbol(GV); + + if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); + else if (I->hasWeakLinkage()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(I->hasLocalLinkage() && "Invalid alias linkage"); + + EmitVisibility(Name, I->getVisibility()); + + // Emit the directives as assignments aka .set: + OutStreamer.EmitAssignment(Name, + MCSymbolRefExpr::Create(Target, OutContext)); + } + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) + MP->finishAssembly(*this); + + // If we don't have any trampolines, then we don't require stack memory + // to be executable. Some targets have a directive to declare this. + Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); + if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) + if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) + OutStreamer.SwitchSection(S); + + // Allow the target to emit any magic that it wants at the end of the file, + // after everything else has gone out. + EmitEndOfAsmFile(M); + + delete Mang; Mang = 0; + MMI = 0; + + OutStreamer.Finish(); + OutStreamer.reset(); + + return false; +} + +void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + this->MF = &MF; + // Get the function symbol. + CurrentFnSym = Mang->getSymbol(MF.getFunction()); + CurrentFnSymForSize = CurrentFnSym; + + if (isVerbose()) + LI = &getAnalysis<MachineLoopInfo>(); +} + +namespace { + // SectionCPs - Keep track the alignment, constpool entries per Section. + struct SectionCPs { + const MCSection *S; + unsigned Alignment; + SmallVector<unsigned, 4> CPEs; + SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {} + }; +} + +/// EmitConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +/// +void AsmPrinter::EmitConstantPool() { + const MachineConstantPool *MCP = MF->getConstantPool(); + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // Calculate sections for constant pool entries. We collect entries to go into + // the same section together to reduce amount of section switch statements. + SmallVector<SectionCPs, 4> CPSections; + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const MachineConstantPoolEntry &CPE = CP[i]; + unsigned Align = CPE.getAlignment(); + + SectionKind Kind; + switch (CPE.getRelocationInfo()) { + default: llvm_unreachable("Unknown section kind"); + case 2: Kind = SectionKind::getReadOnlyWithRel(); break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) { + case 4: Kind = SectionKind::getMergeableConst4(); break; + case 8: Kind = SectionKind::getMergeableConst8(); break; + case 16: Kind = SectionKind::getMergeableConst16();break; + default: Kind = SectionKind::getMergeableConst(); break; + } + } + + const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); + + // The number of sections are small, just do a linear search from the + // last section to the first. + bool Found = false; + unsigned SecIdx = CPSections.size(); + while (SecIdx != 0) { + if (CPSections[--SecIdx].S == S) { + Found = true; + break; + } + } + if (!Found) { + SecIdx = CPSections.size(); + CPSections.push_back(SectionCPs(S, Align)); + } + + if (Align > CPSections[SecIdx].Alignment) + CPSections[SecIdx].Alignment = Align; + CPSections[SecIdx].CPEs.push_back(i); + } + + // Now print stuff into the calculated sections. + for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { + OutStreamer.SwitchSection(CPSections[i].S); + EmitAlignment(Log2_32(CPSections[i].Alignment)); + + unsigned Offset = 0; + for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { + unsigned CPI = CPSections[i].CPEs[j]; + MachineConstantPoolEntry CPE = CP[CPI]; + + // Emit inter-object padding for alignment. + unsigned AlignMask = CPE.getAlignment() - 1; + unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; + OutStreamer.EmitZeros(NewOffset - Offset); + + Type *Ty = CPE.getType(); + Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); + OutStreamer.EmitLabel(GetCPISymbol(CPI)); + + if (CPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); + else + EmitGlobalConstant(CPE.Val.ConstVal); + } + } +} + +/// EmitJumpTableInfo - Print assembly representations of the jump tables used +/// by the current function to the current output stream. +/// +void AsmPrinter::EmitJumpTableInfo() { + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + if (MJTI == 0) return; + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + const Function *F = MF->getFunction(); + bool JTInDiffSection = false; + if (// In PIC mode, we need to emit the jump table to the same section as the + // function body itself, otherwise the label differences won't make sense. + // FIXME: Need a better predicate for this: what about custom entries? + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || + // We should also do if the section name is NULL or function is declared + // in discardable section + // FIXME: this isn't the right predicate, should be based on the MCSection + // for the function. + F->isWeakForLinker()) { + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); + } else { + // Otherwise, drop it in the readonly section. + const MCSection *ReadOnlySection = + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); + OutStreamer.SwitchSection(ReadOnlySection); + JTInDiffSection = true; + } + + EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout()))); + + // Jump tables in code sections are marked with a data_region directive + // where that's supported. + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionJT32); + + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + // For the EK_LabelDifference32 entry, if the target supports .set, emit a + // .set directive for each unique entry. This reduces the number of + // relocations the assembler will generate for the jump table. + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && + MAI->hasSetDirective()) { + SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; + const TargetLowering *TLI = TM.getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + const MachineBasicBlock *MBB = JTBBs[ii]; + if (!EmittedSets.insert(MBB)) continue; + + // .set LJTSet, LBB32-base + const MCExpr *LHS = + MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), + MCBinaryExpr::CreateSub(LHS, Base, OutContext)); + } + } + + // On some targets (e.g. Darwin) we want to emit two consecutive labels + // before each jump table. The first label is never referenced, but tells + // the assembler and linker the extents of the jump table object. The + // second label is actually referenced by the code. + if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) + // FIXME: This doesn't have to have any specific name, just any randomly + // named and numbered 'l' label would work. Simplify GetJTISymbol. + OutStreamer.EmitLabel(GetJTISymbol(JTI, true)); + + OutStreamer.EmitLabel(GetJTISymbol(JTI)); + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); + } + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); +} + +/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the +/// current stream. +void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned UID) const { + assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); + const MCExpr *Value = 0; + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_Inline: + llvm_unreachable("Cannot emit EK_Inline jump table entry"); + case MachineJumpTableInfo::EK_Custom32: + Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID, + OutContext); + break; + case MachineJumpTableInfo::EK_BlockAddress: + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + break; + case MachineJumpTableInfo::EK_GPRel32BlockAddress: { + // EK_GPRel32BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gprel32 LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + return; + } + + case MachineJumpTableInfo::EK_GPRel64BlockAddress: { + // EK_GPRel64BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gpdword LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + return; + } + + case MachineJumpTableInfo::EK_LabelDifference32: { + // EK_LabelDifference32 - Each entry is the address of the block minus + // the address of the jump table. This is used for PIC jump tables where + // gprel32 is not supported. e.g.: + // .word LBB123 - LJTI1_2 + // If the .set directive is supported, this is emitted as: + // .set L4_5_set_123, LBB123 - LJTI1_2 + // .word L4_5_set_123 + + // If we have emitted set directives for the jump table entries, print + // them rather than the entries themselves. If we're emitting PIC, then + // emit the table entries as differences between two text section labels. + if (MAI->hasSetDirective()) { + // If we used .set, reference the .set's symbol. + Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()), + OutContext); + break; + } + // Otherwise, use the difference as the jump table entry. + Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext); + Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext); + break; + } + } + + assert(Value && "Unknown entry kind!"); + + unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); + OutStreamer.EmitValue(Value, EntrySize); +} + + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") { + if (MAI->hasNoDeadStrip()) // No need to emit this at all. + EmitLLVMUsedList(GV->getInitializer()); + return true; + } + + // Ignore debug and non-emitted data. This handles llvm.compiler.used. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + if (GV->getName() == "llvm.global_ctors") { + EmitXXStructorList(GV->getInitializer(), /* isCtor */ true); + + if (TM.getRelocationModel() == Reloc::Static && + MAI->hasStaticCtorDtorReferenceInStaticMode()) { + StringRef Sym(".constructors_used"); + OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym), + MCSA_Reference); + } + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + EmitXXStructorList(GV->getInitializer(), /* isCtor */ false); + + if (TM.getRelocationModel() == Reloc::Static && + MAI->hasStaticCtorDtorReferenceInStaticMode()) { + StringRef Sym(".destructors_used"); + OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym), + MCSA_Reference); + } + return true; + } + + return false; +} + +/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each +/// global in the specified llvm.used list for which emitUsedDirectiveFor +/// is true, as being used with this directive. +void AsmPrinter::EmitLLVMUsedList(const Constant *List) { + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + const GlobalValue *GV = + dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); + if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) + OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip); + } +} + +typedef std::pair<unsigned, Constant*> Structor; + +static bool priority_order(const Structor& lhs, const Structor& rhs) { + return lhs.first < rhs.first; +} + +/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init +/// priority. +void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority. + if (!isa<ConstantArray>(List)) return; + + // Sanity check the structors list. + const ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (!InitList) return; // Not an array! + StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); + if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs! + if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) || + !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). + + // Gather the structors in a form that's convenient for sorting by priority. + SmallVector<Structor, 8> Structors; + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i)); + if (!CS) continue; // Malformed. + if (CS->getOperand(1)->isNullValue()) + break; // Found a null terminator, skip the rest. + ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!Priority) continue; // Malformed. + Structors.push_back(std::make_pair(Priority->getLimitedValue(65535), + CS->getOperand(1))); + } + + // Emit the function pointers in the target-specific order + const DataLayout *TD = TM.getDataLayout(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), priority_order); + for (unsigned i = 0, e = Structors.size(); i != e; ++i) { + const MCSection *OutputSection = + (isCtor ? + getObjFileLowering().getStaticCtorSection(Structors[i].first) : + getObjFileLowering().getStaticDtorSection(Structors[i].first)); + OutStreamer.SwitchSection(OutputSection); + if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) + EmitAlignment(Align); + EmitXXStructor(Structors[i].second); + } +} + +//===--------------------------------------------------------------------===// +// Emission and print routines +// + +/// EmitInt8 - Emit a byte directive and value. +/// +void AsmPrinter::EmitInt8(int Value) const { + OutStreamer.EmitIntValue(Value, 1); +} + +/// EmitInt16 - Emit a short directive and value. +/// +void AsmPrinter::EmitInt16(int Value) const { + OutStreamer.EmitIntValue(Value, 2); +} + +/// EmitInt32 - Emit a long directive and value. +/// +void AsmPrinter::EmitInt32(int Value) const { + OutStreamer.EmitIntValue(Value, 4); +} + +/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size +/// in bytes of the directive is specified by Size and Hi/Lo specify the +/// labels. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, + unsigned Size) const { + // Get the Hi-Lo expression. + const MCExpr *Diff = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext), + MCSymbolRefExpr::Create(Lo, OutContext), + OutContext); + + if (!MAI->hasSetDirective()) { + OutStreamer.EmitValue(Diff, Size); + return; + } + + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + OutStreamer.EmitAssignment(SetLabel, Diff); + OutStreamer.EmitSymbolValue(SetLabel, Size); +} + +/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" +/// where the size in bytes of the directive is specified by Size and Hi/Lo +/// specify the labels. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, + const MCSymbol *Lo, unsigned Size) + const { + + // Emit Hi+Offset - Lo + // Get the Hi+Offset expression. + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + // Get the Hi+Offset-Lo expression. + const MCExpr *Diff = + MCBinaryExpr::CreateSub(Plus, + MCSymbolRefExpr::Create(Lo, OutContext), + OutContext); + + if (!MAI->hasSetDirective()) + OutStreamer.EmitValue(Diff, 4); + else { + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + OutStreamer.EmitAssignment(SetLabel, Diff); + OutStreamer.EmitSymbolValue(SetLabel, 4); + } +} + +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// where the size in bytes of the directive is specified by Size and Label +/// specifies the label. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, + unsigned Size) + const { + + // Emit Label+Offset (or just Label if Offset is zero) + const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); + if (Offset) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + OutStreamer.EmitValue(Expr, Size); +} + + +//===----------------------------------------------------------------------===// + +// EmitAlignment - Emit an alignment directive to the specified power of +// two boundary. For example, if you pass in 3 here, you will get an 8 +// byte alignment. If a global value is specified, and if that global has +// an explicit alignment requested, it will override the alignment request +// if required for correctness. +// +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { + if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); + + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + + if (getCurrentSection()->getKind().isText()) + OutStreamer.EmitCodeAlignment(1 << NumBits); + else + OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0); +} + +//===----------------------------------------------------------------------===// +// Constant emission. +//===----------------------------------------------------------------------===// + +/// lowerConstant - Lower the specified LLVM Constant to an MCExpr. +/// +static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { + MCContext &Ctx = AP.OutContext; + + if (CV->isNullValue() || isa<UndefValue>(CV)) + return MCConstantExpr::Create(0, Ctx); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) + return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) + return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) + return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + if (CE == 0) { + llvm_unreachable("Unknown constant value to lower!"); + } + + switch (CE->getOpcode()) { + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using DataLayout as a + // last resort before giving up. + if (Constant *C = + ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (C != CE) + return lowerConstant(C, AP); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + case Instruction::GetElementPtr: { + const DataLayout &TD = *AP.TM.getDataLayout(); + // Generate a symbolic expression for the byte address + APInt OffsetAI(TD.getPointerSizeInBits(), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); + + const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); + if (!OffsetAI) + return Base; + + int64_t Offset = OffsetAI.getSExtValue(); + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + Ctx); + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + // FALL THROUGH. + case Instruction::BitCast: + return lowerConstant(CE->getOperand(0), AP); + + case Instruction::IntToPtr: { + const DataLayout &TD = *AP.TM.getDataLayout(); + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + false/*ZExt*/); + return lowerConstant(Op, AP); + } + + case Instruction::PtrToInt: { + const DataLayout &TD = *AP.TM.getDataLayout(); + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + Type *Ty = CE->getType(); + + const MCExpr *OpExpr = lowerConstant(Op, AP); + + // We can emit the pointer value into this slot if the slot is an + // integer slot equal to the size of the pointer. + if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + return OpExpr; + + // Otherwise the pointer is smaller than the resultant integer, mask off + // the high bits so we are sure to get a proper truncation if the input is + // a constant expr. + unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + } + + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::Shl: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP); + const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP); + switch (CE->getOpcode()) { + default: llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); + case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + } + } + } +} + +static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, + AsmPrinter &AP); + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const ConstantDataSequential *V) { + StringRef Data = V->getRawDataValues(); + assert(!Data.empty() && "Empty aggregates should be CAZ node"); + char C = Data[0]; + for (unsigned i = 1, e = Data.size(); i != e; ++i) + if (Data[i] != C) return -1; + return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1. +} + + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) return -1; + + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType()); + uint64_t Value = CI->getZExtValue(); + + // Make sure the constant is at least 8 bits long and has a power + // of 2 bit width. This guarantees the constant bit width is + // always a multiple of 8 bits, avoiding issues with padding out + // to Size and other such corner cases. + if (CI->getBitWidth() < 8 || !isPowerOf2_64(CI->getBitWidth())) return -1; + + uint8_t Byte = static_cast<uint8_t>(Value); + + for (unsigned i = 1; i < Size; ++i) { + Value >>= 8; + if (static_cast<uint8_t>(Value) != Byte) return -1; + } + return Byte; + } + if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) { + // Make sure all array elements are sequences of the same repeated + // byte. + assert(CA->getNumOperands() != 0 && "Should be a CAZ"); + int Byte = isRepeatedByteSequence(CA->getOperand(0), TM); + if (Byte == -1) return -1; + + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { + int ThisByte = isRepeatedByteSequence(CA->getOperand(i), TM); + if (ThisByte == -1) return -1; + if (Byte != ThisByte) return -1; + } + return Byte; + } + + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) + return isRepeatedByteSequence(CDS); + + return -1; +} + +static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, + unsigned AddrSpace,AsmPrinter &AP){ + + // See if we can aggregate this into a .fill, if so, emit it as such. + int Value = isRepeatedByteSequence(CDS, AP.TM); + if (Value != -1) { + uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); + // Don't emit a 1-byte object as a .fill. + if (Bytes > 1) + return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + + // If this can be emitted with .ascii/.asciz, emit it as such. + if (CDS->isString()) + return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); + + // Otherwise, emit the values in successive locations. + unsigned ElementByteSize = CDS->getElementByteSize(); + if (isa<IntegerType>(CDS->getElementType())) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", + CDS->getElementAsInteger(i)); + AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), + ElementByteSize, AddrSpace); + } + } else if (ElementByteSize == 4) { + // FP Constants are printed as integer constants to avoid losing + // precision. + assert(CDS->getElementType()->isFloatTy()); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + float F; + uint32_t I; + }; + + F = CDS->getElementAsFloat(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); + } + } else { + assert(CDS->getElementType()->isDoubleTy()); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + double F; + uint64_t I; + }; + + F = CDS->getElementAsDouble(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); + } + } + + const DataLayout &TD = *AP.TM.getDataLayout(); + unsigned Size = TD.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * + CDS->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer.EmitZeros(Padding, AddrSpace); + +} + +static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, + AsmPrinter &AP) { + // See if we can aggregate some values. Make sure it can be + // represented as a series of bytes of the constant value. + int Value = isRepeatedByteSequence(CA, AP.TM); + + if (Value != -1) { + uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); + AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + else { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + } +} + +static void emitGlobalConstantVector(const ConstantVector *CV, + unsigned AddrSpace, AsmPrinter &AP) { + for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) + emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + + const DataLayout &TD = *AP.TM.getDataLayout(); + unsigned Size = TD.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * + CV->getType()->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer.EmitZeros(Padding, AddrSpace); +} + +static void emitGlobalConstantStruct(const ConstantStruct *CS, + unsigned AddrSpace, AsmPrinter &AP) { + // Print the fields in successive locations. Pad to align if needed! + const DataLayout *TD = AP.TM.getDataLayout(); + unsigned Size = TD->getTypeAllocSize(CS->getType()); + const StructLayout *Layout = TD->getStructLayout(CS->getType()); + uint64_t SizeSoFar = 0; + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + const Constant *Field = CS->getOperand(i); + + // Check if padding is needed and insert one or more 0s. + uint64_t FieldSize = TD->getTypeAllocSize(Field->getType()); + uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) + - Layout->getElementOffset(i)) - FieldSize; + SizeSoFar += FieldSize + PadSize; + + // Now print the actual field value. + emitGlobalConstantImpl(Field, AddrSpace, AP); + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + AP.OutStreamer.EmitZeros(PadSize, AddrSpace); + } + assert(SizeSoFar == Layout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, + AsmPrinter &AP) { + APInt API = CFP->getValueAPF().bitcastToAPInt(); + + // First print a comment with what we think the original floating-point value + // should have been. + if (AP.isVerbose()) { + SmallString<8> StrVal; + CFP->getValueAPF().toString(StrVal); + + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; + } + + // Now iterate through the APInt chunks, emitting them in endian-correct + // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit + // floats). + unsigned NumBytes = API.getBitWidth() / 8; + unsigned TrailingBytes = NumBytes % sizeof(uint64_t); + const uint64_t *p = API.getRawData(); + + // PPC's long double has odd notions of endianness compared to how LLVM + // handles it: p[0] goes first for *big* endian on PPC. + if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + int Chunk = API.getNumWords() - 1; + + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); + + for (; Chunk >= 0; --Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + } else { + unsigned Chunk; + for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); + } + + // Emit the tail padding for the long double. + const DataLayout &TD = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - + TD.getTypeStoreSize(CFP->getType()), AddrSpace); +} + +static void emitGlobalConstantLargeInt(const ConstantInt *CI, + unsigned AddrSpace, AsmPrinter &AP) { + const DataLayout *TD = AP.TM.getDataLayout(); + unsigned BitWidth = CI->getBitWidth(); + assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); + + // We don't expect assemblers to support integer data directives + // for more than 64 bits, so we emit the data in at most 64-bit + // quantities at a time. + const uint64_t *RawData = CI->getValue().getRawData(); + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + } +} + +static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, + AsmPrinter &AP) { + const DataLayout *TD = AP.TM.getDataLayout(); + uint64_t Size = TD->getTypeAllocSize(CV->getType()); + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) + return AP.OutStreamer.EmitZeros(Size, AddrSpace); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + switch (Size) { + case 1: + case 2: + case 4: + case 8: + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", + CI->getZExtValue()); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + return; + default: + emitGlobalConstantLargeInt(CI, AddrSpace, AP); + return; + } + } + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) + return emitGlobalConstantFP(CFP, AddrSpace, AP); + + if (isa<ConstantPointerNull>(CV)) { + AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); + return; + } + + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) + return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) + return emitGlobalConstantArray(CVA, AddrSpace, AP); + + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) + return emitGlobalConstantStruct(CVS, AddrSpace, AP); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). + if (CE->getOpcode() == Instruction::BitCast) + return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + + if (Size > 8) { + // If the constant expression's size is greater than 64-bits, then we have + // to emit the value in chunks. Try to constant fold the value and emit it + // that way. + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + return emitGlobalConstantImpl(New, AddrSpace, AP); + } + } + + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) + return emitGlobalConstantVector(V, AddrSpace, AP); + + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it + // thread the streamer with EmitValue. + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); + if (Size) + emitGlobalConstantImpl(CV, AddrSpace, *this); + else if (MAI->hasSubsectionsViaSymbols()) { + // If the global has zero size, emit a single byte so that two labels don't + // look like they are at the same location. + OutStreamer.EmitIntValue(0, 1, AddrSpace); + } +} + +void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // Target doesn't support this yet! + llvm_unreachable("Target does not support EmitMachineConstantPoolValue"); +} + +void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { + if (Offset > 0) + OS << '+' << Offset; + else if (Offset < 0) + OS << Offset; +} + +//===----------------------------------------------------------------------===// +// Symbol Lowering Routines. +//===----------------------------------------------------------------------===// + +/// GetTempSymbol - Return the MCSymbol corresponding to the assembler +/// temporary label with the specified stem and unique ID. +MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const { + return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + Name + Twine(ID)); +} + +/// GetTempSymbol - Return an assembler temporary label with the specified +/// stem. +MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const { + return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+ + Name); +} + + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { + return MMI->getAddrLabelSymbol(BA->getBasicBlock()); +} + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { + return MMI->getAddrLabelSymbol(BB); +} + +/// GetCPISymbol - Return the symbol for the specified constant pool entry. +MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + return OutContext.GetOrCreateSymbol + (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + + "_" + Twine(CPID)); +} + +/// GetJTISymbol - Return the symbol for the specified jump table entry. +MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { + return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate); +} + +/// GetJTSetSymbol - Return the symbol for the specified jump table .set +/// FIXME: privatize to AsmPrinter. +MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { + return OutContext.GetOrCreateSymbol + (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + + Twine(UID) + "_set_" + Twine(MBBID)); +} + +/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with +/// global value name as its base, with the specified suffix, and where the +/// symbol is forced to have private linkage if ForcePrivate is true. +MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix, + bool ForcePrivate) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, GV, ForcePrivate); + NameStr.append(Suffix.begin(), Suffix.end()); + return OutContext.GetOrCreateSymbol(NameStr.str()); +} + +/// GetExternalSymbolSymbol - Return the MCSymbol for the specified +/// ExternalSymbol. +MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, Sym); + return OutContext.GetOrCreateSymbol(NameStr.str()); +} + + + +/// PrintParentLoopComment - Print comments about parent loops of this one. +static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + if (Loop == 0) return; + PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber); + OS.indent(Loop->getLoopDepth()*2) + << "Parent Loop BB" << FunctionNumber << "_" + << Loop->getHeader()->getNumber() + << " Depth=" << Loop->getLoopDepth() << '\n'; +} + + +/// PrintChildLoopComment - Print comments about child loops within +/// the loop for this basic block, with nesting. +static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + // Add child loop information + for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){ + OS.indent((*CL)->getLoopDepth()*2) + << "Child Loop BB" << FunctionNumber << "_" + << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth() + << '\n'; + PrintChildLoopComment(OS, *CL, FunctionNumber); + } +} + +/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks. +static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, + const MachineLoopInfo *LI, + const AsmPrinter &AP) { + // Add loop depth information + const MachineLoop *Loop = LI->getLoopFor(&MBB); + if (Loop == 0) return; + + MachineBasicBlock *Header = Loop->getHeader(); + assert(Header && "No header for loop"); + + // If this block is not a loop header, just print out what is the loop header + // and return. + if (Header != &MBB) { + AP.OutStreamer.AddComment(" in Loop: Header=BB" + + Twine(AP.getFunctionNumber())+"_" + + Twine(Loop->getHeader()->getNumber())+ + " Depth="+Twine(Loop->getLoopDepth())); + return; + } + + // Otherwise, it is a loop header. Print out information about child and + // parent loops. + raw_ostream &OS = AP.OutStreamer.GetCommentOS(); + + PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); + + OS << "=>"; + OS.indent(Loop->getLoopDepth()*2-2); + + OS << "This "; + if (Loop->empty()) + OS << "Inner "; + OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; + + PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); +} + + +/// EmitBasicBlockStart - This method prints the label for the specified +/// MachineBasicBlock, an alignment (if present) and a comment describing +/// it if appropriate. +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { + // Emit an alignment directive for this block, if needed. + if (unsigned Align = MBB->getAlignment()) + EmitAlignment(Align); + + // If the block has its address taken, emit any labels that were used to + // reference the block. It is possible that there is more than one label + // here, because multiple LLVM BB's may have been RAUW'd to this block after + // the references were generated. + if (MBB->hasAddressTaken()) { + const BasicBlock *BB = MBB->getBasicBlock(); + if (isVerbose()) + OutStreamer.AddComment("Block address taken"); + + std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB); + + for (unsigned i = 0, e = Syms.size(); i != e; ++i) + OutStreamer.EmitLabel(Syms[i]); + } + + // Print some verbose block comments. + if (isVerbose()) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OutStreamer.AddComment("%" + BB->getName()); + emitBasicBlockLoopComments(*MBB, LI, *this); + } + + // Print the main label for the block. + if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + // NOTE: Want this comment at start of line, don't emit with AddComment. + OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + + Twine(MBB->getNumber()) + ":"); + } + } else { + OutStreamer.EmitLabel(MBB->getSymbol()); + } +} + +void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, + bool IsDefinition) const { + MCSymbolAttr Attr = MCSA_Invalid; + + switch (Visibility) { + default: break; + case GlobalValue::HiddenVisibility: + if (IsDefinition) + Attr = MAI->getHiddenVisibilityAttr(); + else + Attr = MAI->getHiddenDeclarationVisibilityAttr(); + break; + case GlobalValue::ProtectedVisibility: + Attr = MAI->getProtectedVisibilityAttr(); + break; + } + + if (Attr != MCSA_Invalid) + OutStreamer.EmitSymbolAttribute(Sym, Attr); +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool AsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Check the terminators in the previous blocks + for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(), + IE = Pred->end(); II != IE; ++II) { + MachineInstr &MI = *II; + + // If it is not a simple branch, we are in a table somewhere. + if (!MI.isBranch() || MI.isIndirectBranch()) + return false; + + // If we are the operands of one of the branches, this is not + // a fall through. + for (MachineInstr::mop_iterator OI = MI.operands_begin(), + OE = MI.operands_end(); OI != OE; ++OI) { + const MachineOperand& OP = *OI; + if (OP.isJTI()) + return false; + if (OP.isMBB() && OP.getMBB() == MBB) + return false; + } + } + + return true; +} + + + +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { + if (!S->usesMetadata()) + return 0; + + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + gcp_map_type::iterator GCPI = GCMap.find(S); + if (GCPI != GCMap.end()) + return GCPI->second; + + const char *Name = S->getName().c_str(); + + for (GCMetadataPrinterRegistry::iterator + I = GCMetadataPrinterRegistry::begin(), + E = GCMetadataPrinterRegistry::end(); I != E; ++I) + if (strcmp(Name, I->getName()) == 0) { + GCMetadataPrinter *GMP = I->instantiate(); + GMP->S = S; + GCMap.insert(std::make_pair(S, GMP)); + return GMP; + } + + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp new file mode 100644 index 0000000..156acac --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -0,0 +1,196 @@ +//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Dwarf emissions parts of AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Dwarf Emission Helper Routines +//===----------------------------------------------------------------------===// + +/// EmitSLEB128 - emit the specified signed leb128 value. +void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { + if (isVerbose() && Desc) + OutStreamer.AddComment(Desc); + + OutStreamer.EmitSLEB128IntValue(Value); +} + +/// EmitULEB128 - emit the specified signed leb128 value. +void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, + unsigned PadTo) const { + if (isVerbose() && Desc) + OutStreamer.AddComment(Desc); + + OutStreamer.EmitULEB128IntValue(Value, PadTo); +} + +/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. +void AsmPrinter::EmitCFAByte(unsigned Val) const { + if (isVerbose()) { + if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64) + OutStreamer.AddComment("DW_CFA_offset + Reg (" + + Twine(Val-dwarf::DW_CFA_offset) + ")"); + else + OutStreamer.AddComment(dwarf::CallFrameString(Val)); + } + OutStreamer.EmitIntValue(Val, 1); +} + +static const char *DecodeDWARFEncoding(unsigned Encoding) { + switch (Encoding) { + case dwarf::DW_EH_PE_absptr: return "absptr"; + case dwarf::DW_EH_PE_omit: return "omit"; + case dwarf::DW_EH_PE_pcrel: return "pcrel"; + case dwarf::DW_EH_PE_udata4: return "udata4"; + case dwarf::DW_EH_PE_udata8: return "udata8"; + case dwarf::DW_EH_PE_sdata4: return "sdata4"; + case dwarf::DW_EH_PE_sdata8: return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: + return "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: + return "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: + return "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: + return "indirect pcrel sdata8"; + } + + return "<unknown encoding>"; +} + + +/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an +/// encoding. If verbose assembly output is enabled, we output comments +/// describing the encoding. Desc is an optional string saying what the +/// encoding is specifying (e.g. "LSDA"). +void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { + if (isVerbose()) { + if (Desc != 0) + OutStreamer.AddComment(Twine(Desc)+" Encoding = " + + Twine(DecodeDWARFEncoding(Val))); + else + OutStreamer.AddComment(Twine("Encoding = ") + + DecodeDWARFEncoding(Val)); + } + + OutStreamer.EmitIntValue(Val, 1); +} + +/// GetSizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + default: llvm_unreachable("Invalid encoded value."); + case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); + case dwarf::DW_EH_PE_udata2: return 2; + case dwarf::DW_EH_PE_udata4: return 4; + case dwarf::DW_EH_PE_udata8: return 8; + } +} + +void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, + unsigned Encoding) const { + if (GV) { + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + const MCExpr *Exp = + TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); + OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); + } else + OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); +} + +/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its +/// section. This can be done with a special directive if the target supports +/// it (e.g. cygwin) or by emitting it as an offset from a label at the start +/// of the section. +/// +/// SectionLabel is a temporary label emitted at the start of the section that +/// Label lives in. +void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, + const MCSymbol *SectionLabel) const { + // On COFF targets, we have to emit the special .secrel32 directive. + if (MAI->getDwarfSectionOffsetDirective()) { + OutStreamer.EmitCOFFSecRel32(Label); + return; + } + + // Get the section that we're referring to, based on SectionLabel. + const MCSection &Section = SectionLabel->getSection(); + + // If Label has already been emitted, verify that it is in the same section as + // section label for sanity. + assert((!Label->isInSection() || &Label->getSection() == &Section) && + "Section offset using wrong section base for label"); + + // If the section in question will end up with an address of 0 anyway, we can + // just emit an absolute reference to save a relocation. + if (Section.isBaseAddressKnownZero()) { + OutStreamer.EmitSymbolValue(Label, 4); + return; + } + + // Otherwise, emit it as a label difference from the start of the section. + EmitLabelDifference(Label, SectionLabel, 4); +} + +//===----------------------------------------------------------------------===// +// Dwarf Lowering Routines +//===----------------------------------------------------------------------===// + +/// EmitCFIFrameMove - Emit a frame instruction. +void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (Src.getReg() == MachineLocation::VirtualFP) { + OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); + } else { + // Reg + Offset + OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true), + Src.getOffset()); + } + } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); + } else { + assert(!Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), + Dst.getOffset()); + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp new file mode 100644 index 0000000..abfa330 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -0,0 +1,553 @@ +//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the inline assembler pieces of the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +namespace { + struct SrcMgrDiagInfo { + const MDNode *LocInfo; + LLVMContext::InlineAsmDiagHandlerTy DiagHandler; + void *DiagContext; + }; +} + +/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an +/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo +/// struct above. +static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { + SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo); + assert(DiagInfo && "Diagnostic context not passed down?"); + + // If the inline asm had metadata associated with it, pull out a location + // cookie corresponding to which line the error occurred on. + unsigned LocCookie = 0; + if (const MDNode *LocInfo = DiagInfo->LocInfo) { + unsigned ErrorLine = Diag.getLineNo()-1; + if (ErrorLine >= LocInfo->getNumOperands()) + ErrorLine = 0; + + if (LocInfo->getNumOperands() != 0) + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine))) + LocCookie = CI->getZExtValue(); + } + + DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); +} + +/// EmitInlineAsm - Emit a blob of inline asm to the output streamer. +void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, + InlineAsm::AsmDialect Dialect) const { + assert(!Str.empty() && "Can't emit empty inline asm block"); + + // Remember if the buffer is nul terminated or not so we can avoid a copy. + bool isNullTerminated = Str.back() == 0; + if (isNullTerminated) + Str = Str.substr(0, Str.size()-1); + + // If the output streamer is actually a .s file, just emit the blob textually. + // This is useful in case the asm parser doesn't handle something but the + // system assembler does. + if (OutStreamer.hasRawTextSupport()) { + OutStreamer.EmitRawText(Str); + return; + } + + SourceMgr SrcMgr; + SrcMgrDiagInfo DiagInfo; + + // If the current LLVMContext has an inline asm handler, set it in SourceMgr. + LLVMContext &LLVMCtx = MMI->getModule()->getContext(); + bool HasDiagHandler = false; + if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { + // If the source manager has an issue, we arrange for srcMgrDiagHandler + // to be invoked, getting DiagInfo passed into it. + DiagInfo.LocInfo = LocMDNode; + DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); + DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); + SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo); + HasDiagHandler = true; + } + + MemoryBuffer *Buffer; + if (isNullTerminated) + Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>"); + else + Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>"); + + // Tell SrcMgr about this buffer, it takes ownership of the buffer. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr, + OutContext, OutStreamer, + *MAI)); + + // FIXME: It would be nice if we can avoid createing a new instance of + // MCSubtargetInfo here given TargetSubtargetInfo is available. However, + // we have to watch out for asm directives which can change subtarget + // state. e.g. .code 16, .code 32. + OwningPtr<MCSubtargetInfo> + STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(), + TM.getTargetCPU(), + TM.getTargetFeatureString())); + OwningPtr<MCTargetAsmParser> + TAP(TM.getTarget().createMCAsmParser(*STI, *Parser)); + if (!TAP) + report_fatal_error("Inline asm not supported by this streamer because" + " we don't have an asm parser for this target\n"); + Parser->setAssemblerDialect(Dialect); + Parser->setTargetParser(*TAP.get()); + + // Don't implicitly switch to the text section before the asm. + int Res = Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); + if (Res && !HasDiagHandler) + report_fatal_error("Error parsing inline asm\n"); +} + +static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + AsmPrinter *AP, unsigned LocCookie, + raw_ostream &OS) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; + + const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': + ++LastEmitted; // Consume second '$' character. + break; + } + if (Done) break; + + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; + + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + // Okay, we finally have a value number. Ask the target to print this + // operand! + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ 0, OS); + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + break; + } + } + } + OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. +} + +static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + int AsmPrinterVariant, AsmPrinter *AP, + unsigned LocCookie, raw_ostream &OS) { + int CurVariant = -1; // The number of the {.|.|.} region we are in. + const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + OS << '\t'; + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': // $$ -> $ + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS << '$'; + ++LastEmitted; // Consume second '$' character. + break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) + report_fatal_error("Nested variants found in inline asm string: '" + + Twine(AsmStr) + "'"); + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // consume '|' character. + if (CurVariant == -1) + OS << '|'; // this is gcc's behavior for | outside a variant + else + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // consume ')' character. + if (CurVariant == -1) + OS << '}'; // this is gcc's behavior for } outside a variant + else + CurVariant = -1; + break; + } + if (Done) break; + + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (HasCurlyBraces && *LastEmitted == ':') { + ++LastEmitted; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (StrEnd == 0) + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); + + std::string Val(StrStart, StrEnd); + AP->PrintSpecial(MI, OS, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; + + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) + report_fatal_error("Bad ${:} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + ++LastEmitted; // Consume '}' character. + } + + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + // Okay, we finally have a value number. Ask the target to print this + // operand! + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (Modifier[0] == 'l') // labels are target independent + // FIXME: What if the operand isn't an MBB, report error? + OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); + else { + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, + Modifier[0] ? Modifier : 0, + OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + Modifier[0] ? Modifier : 0, OS); + } + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + } + break; + } + } + } + OS << '\n' << (char)0; // null terminate string. +} + +/// EmitInlineAsm - This method formats and emits the specified machine +/// instruction that is an inline asm. +void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { + assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + // Don't emit the comments if writing to a .o file. + if (!OutStreamer.hasRawTextSupport()) return; + + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmEnd()); + return; + } + + // Emit the #APP start marker. This has to happen even if verbose-asm isn't + // enabled, so we use EmitRawText. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmStart()); + + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata() && + (LocMD = MI->getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + // Emit the inline asm to a temporary string so we can emit it through + // EmitInlineAsm. + SmallString<256> StringData; + raw_svector_ostream OS(StringData); + + // The variant of the current asmprinter. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect(); + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if (InlineAsmVariant == InlineAsm::AD_ATT) + EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant, + AP, LocCookie, OS); + else + EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); + + EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); + + // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't + // enabled, so we use EmitRawText. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ + MAI->getInlineAsmEnd()); +} + + +/// PrintSpecial - Print information related to the specified machine instr +/// that is independent of the operand, and may be independent of the instr +/// itself. This can be useful for portably encoding the comment character +/// or other bits of target-specific knowledge into the asmstrings. The +/// syntax used is ${:comment}. Targets can override this to add support +/// for their own strange codes. +void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, + const char *Code) const { + if (!strcmp(Code, "private")) { + OS << MAI->getPrivateGlobalPrefix(); + } else if (!strcmp(Code, "comment")) { + OS << MAI->getCommentString(); + } else if (!strcmp(Code, "uid")) { + // Comparing the address of MI isn't sufficient, because machineinstrs may + // be allocated to the same address across functions. + + // If this is a new LastFn instruction, bump the counter. + if (LastMI != MI || LastFn != getFunctionNumber()) { + ++Counter; + LastMI = MI; + LastFn = getFunctionNumber(); + } + OS << Counter; + } else { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unknown special formatter '" << Code + << "' for machine instr: " << *MI; + report_fatal_error(Msg.str()); + } +} + +/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM +/// instruction, using the specified assembler variant. Targets should +/// override this to format as appropriate. +bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNo); + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'c': // Substitute immediate value without immediate syntax + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'n': // Negate the immediate constant. + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << -MO.getImm(); + return false; + } + } + return true; +} + +bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Target doesn't support this yet! + return true; +} + diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp new file mode 100644 index 0000000..57e0acd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -0,0 +1,368 @@ +//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DIEAbbrevData Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Attribute); + ID.AddInteger(Form); +} + +//===----------------------------------------------------------------------===// +// DIEAbbrev Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Tag); + ID.AddInteger(ChildrenFlag); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) + Data[i].Profile(ID); +} + +/// Emit - Print the abbreviation using the specified asm printer. +/// +void DIEAbbrev::Emit(AsmPrinter *AP) const { + // Emit its Dwarf tag type. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(Tag, dwarf::TagString(Tag)); + + // Emit whether it has children DIEs. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + const DIEAbbrevData &AttrData = Data[i]; + + // Emit attribute type. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(AttrData.getAttribute(), + dwarf::AttributeString(AttrData.getAttribute())); + + // Emit form type. + // FIXME: Doing work even in non-asm-verbose runs. + AP->EmitULEB128(AttrData.getForm(), + dwarf::FormEncodingString(AttrData.getForm())); + } + + // Mark end of abbreviation. + AP->EmitULEB128(0, "EOM(1)"); + AP->EmitULEB128(0, "EOM(2)"); +} + +#ifndef NDEBUG +void DIEAbbrev::print(raw_ostream &O) { + O << "Abbreviation @" + << format("0x%lx", (long)(intptr_t)this) + << " " + << dwarf::TagString(Tag) + << " " + << dwarf::ChildrenString(ChildrenFlag) + << '\n'; + + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << " " + << dwarf::AttributeString(Data[i].getAttribute()) + << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << '\n'; + } +} +void DIEAbbrev::dump() { print(dbgs()); } +#endif + +//===----------------------------------------------------------------------===// +// DIE Implementation +//===----------------------------------------------------------------------===// + +DIE::~DIE() { + for (unsigned i = 0, N = Children.size(); i < N; ++i) + delete Children[i]; +} + +/// Climb up the parent chain to get the compile unit DIE this DIE belongs to. +DIE *DIE::getCompileUnit() const{ + DIE *p = getParent(); + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit) + return p; + p = p->getParent(); + } + llvm_unreachable("We should not have orphaned DIEs."); +} + +#ifndef NDEBUG +void DIE::print(raw_ostream &O, unsigned IncIndent) { + IndentCount += IncIndent; + const std::string Indent(IndentCount, ' '); + bool isBlock = Abbrev.getTag() == 0; + + if (!isBlock) { + O << Indent + << "Die: " + << format("0x%lx", (long)(intptr_t)this) + << ", Offset: " << Offset + << ", Size: " << Size << "\n"; + + O << Indent + << dwarf::TagString(Abbrev.getTag()) + << " " + << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n"; + } else { + O << "Size: " << Size << "\n"; + } + + const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData(); + + IndentCount += 2; + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << Indent; + + if (!isBlock) + O << dwarf::AttributeString(Data[i].getAttribute()); + else + O << "Blk[" << i << "]"; + + O << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << " "; + Values[i]->print(O); + O << "\n"; + } + IndentCount -= 2; + + for (unsigned j = 0, M = Children.size(); j < M; ++j) { + Children[j]->print(O, 4); + } + + if (!isBlock) O << "\n"; + IndentCount -= IncIndent; +} + +void DIE::dump() { + print(dbgs()); +} +#endif + +void DIEValue::anchor() { } + +#ifndef NDEBUG +void DIEValue::dump() { + print(dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEInteger Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit integer of appropriate size. +/// +void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { + unsigned Size = ~0U; + switch (Form) { + case dwarf::DW_FORM_flag_present: + // Emit something to keep the lines and comments in sync. + // FIXME: Is there a better way to do this? + if (Asm->OutStreamer.hasRawTextSupport()) + Asm->OutStreamer.EmitRawText(StringRef("")); + return; + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: Size = 1; break; + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: Size = 2; break; + case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: Size = 4; break; + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: Size = 8; break; + case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; + case dwarf::DW_FORM_addr: + Size = Asm->getDataLayout().getPointerSize(); break; + default: llvm_unreachable("DIE Value form not supported yet"); + } + Asm->OutStreamer.EmitIntValue(Integer, Size); +} + +/// SizeOf - Determine size of integer value in bytes. +/// +unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_flag_present: return 0; + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: return sizeof(int8_t); + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: return sizeof(int32_t); + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_data8: return sizeof(int64_t); + case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); + default: llvm_unreachable("DIE Value form not supported yet"); + } +} + +#ifndef NDEBUG +void DIEInteger::print(raw_ostream &O) { + O << "Int: " << (int64_t)Integer << " 0x"; + O.write_hex(Integer); +} +#endif + +//===----------------------------------------------------------------------===// +// DIELabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getDataLayout().getPointerSize(); +} + +#ifndef NDEBUG +void DIELabel::print(raw_ostream &O) { + O << "Lbl: " << Label->getName(); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEDelta Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getDataLayout().getPointerSize(); +} + +#ifndef NDEBUG +void DIEDelta::print(raw_ostream &O) { + O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEEntry Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit debug information entry offset. +/// +void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const { + AP->EmitInt32(Entry->getOffset()); +} + +#ifndef NDEBUG +void DIEEntry::print(raw_ostream &O) { + O << format("Die: 0x%lx", (long)(intptr_t)Entry); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEBlock Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the block. +/// +unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { + if (!Size) { + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); + } + + return Size; +} + +/// EmitValue - Emit block data. +/// +void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { + switch (Form) { + default: llvm_unreachable("Improper form for block"); + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + } + + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); +} + +/// SizeOf - Determine size of block data in bytes. +/// +unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); + } +} + +#ifndef NDEBUG +void DIEBlock::print(raw_ostream &O) { + O << "Blk: "; + DIE::print(O, 5); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h new file mode 100644 index 0000000..c332aa2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -0,0 +1,392 @@ +//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DIE_H__ +#define CODEGEN_ASMPRINTER_DIE_H__ + +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Dwarf.h" +#include <vector> + +namespace llvm { + class AsmPrinter; + class MCSymbol; + class raw_ostream; + + //===--------------------------------------------------------------------===// + /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a + /// Dwarf abbreviation. + class DIEAbbrevData { + /// Attribute - Dwarf attribute code. + /// + uint16_t Attribute; + + /// Form - Dwarf form code. + /// + uint16_t Form; + public: + DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} + + // Accessors. + uint16_t getAttribute() const { return Attribute; } + uint16_t getForm() const { return Form; } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + }; + + //===--------------------------------------------------------------------===// + /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug + /// information object. + class DIEAbbrev : public FoldingSetNode { + /// Tag - Dwarf tag code. + /// + uint16_t Tag; + + /// ChildrenFlag - Dwarf children flag. + /// + uint16_t ChildrenFlag; + + /// Unique number for node. + /// + unsigned Number; + + /// Data - Raw data bytes for abbreviation. + /// + SmallVector<DIEAbbrevData, 12> Data; + + public: + DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} + + // Accessors. + uint16_t getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + uint16_t getChildrenFlag() const { return ChildrenFlag; } + const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } + void setTag(uint16_t T) { Tag = T; } + void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } + void setNumber(unsigned N) { Number = N; } + + /// AddAttribute - Adds another set of attribute information to the + /// abbreviation. + void AddAttribute(uint16_t Attribute, uint16_t Form) { + Data.push_back(DIEAbbrevData(Attribute, Form)); + } + + /// AddFirstAttribute - Adds a set of attribute information to the front + /// of the abbreviation. + void AddFirstAttribute(uint16_t Attribute, uint16_t Form) { + Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); + } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + + /// Emit - Print the abbreviation using the specified asm printer. + /// + void Emit(AsmPrinter *AP) const; + +#ifndef NDEBUG + void print(raw_ostream &O); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIE - A structured debug information entry. Has an abbreviation which + /// describes its organization. + class DIEValue; + + class DIE { + protected: + /// Offset - Offset in debug info section. + /// + unsigned Offset; + + /// Size - Size of instance + children. + /// + unsigned Size; + + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + + /// Children DIEs. + /// + std::vector<DIE *> Children; + + DIE *Parent; + + /// Attribute values. + /// + SmallVector<DIEValue*, 12> Values; + + // Private data for print() + mutable unsigned IndentCount; + public: + explicit DIE(unsigned Tag) + : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0), + IndentCount(0) {} + virtual ~DIE(); + + // Accessors. + DIEAbbrev &getAbbrev() { return Abbrev; } + unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } + unsigned getTag() const { return Abbrev.getTag(); } + unsigned getOffset() const { return Offset; } + unsigned getSize() const { return Size; } + const std::vector<DIE *> &getChildren() const { return Children; } + const SmallVectorImpl<DIEValue*> &getValues() const { return Values; } + DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile unit DIE this DIE belongs + /// to. + DIE *getCompileUnit() const; + void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + void setOffset(unsigned O) { Offset = O; } + void setSize(unsigned S) { Size = S; } + + /// addValue - Add a value and attributes to a DIE. + /// + void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + Abbrev.AddAttribute(Attribute, Form); + Values.push_back(Value); + } + + /// addChild - Add a child to the DIE. + /// + void addChild(DIE *Child) { + if (Child->getParent()) { + assert (Child->getParent() == this && "Unexpected DIE Parent!"); + return; + } + Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); + Children.push_back(Child); + Child->Parent = this; + } + +#ifndef NDEBUG + void print(raw_ostream &O, unsigned IncIndent = 0); + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEValue - A debug information entry value. + /// + class DIEValue { + virtual void anchor(); + public: + enum { + isInteger, + isString, + isLabel, + isDelta, + isEntry, + isBlock + }; + protected: + /// Type - Type of data stored in the value. + /// + unsigned Type; + public: + explicit DIEValue(unsigned T) : Type(T) {} + virtual ~DIEValue() {} + + // Accessors + unsigned getType() const { return Type; } + + /// EmitValue - Emit value via the Dwarf writer. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0; + + /// SizeOf - Return the size of a value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; + +#ifndef NDEBUG + virtual void print(raw_ostream &O) = 0; + void dump(); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEInteger - An integer value DIE. + /// + class DIEInteger : public DIEValue { + uint64_t Integer; + public: + explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} + + /// BestForm - Choose the best form for integer. + /// + static unsigned BestForm(bool IsSigned, uint64_t Int) { + if (IsSigned) { + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; + } else { + if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; + if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; + if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4; + } + return dwarf::DW_FORM_data8; + } + + /// EmitValue - Emit integer of appropriate size. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + uint64_t getValue() const { return Integer; } + + /// SizeOf - Determine size of integer value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *I) { return I->getType() == isInteger; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIELabel - A label expression DIE. + // + class DIELabel : public DIEValue { + const MCSymbol *Label; + public: + explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} + + /// EmitValue - Emit label value. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// getValue - Get MCSymbol. + /// + const MCSymbol *getValue() const { return Label; } + + /// SizeOf - Determine size of label value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *L) { return L->getType() == isLabel; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEDelta - A simple label difference DIE. + /// + class DIEDelta : public DIEValue { + const MCSymbol *LabelHi; + const MCSymbol *LabelLo; + public: + DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) + : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isDelta; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEEntry - A pointer to another debug information entry. An instance of + /// this class can also be used as a proxy for a debug information entry not + /// yet defined (ie. types.) + class DIEEntry : public DIEValue { + DIE *const Entry; + public: + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + + DIE *getEntry() const { return Entry; } + + /// EmitValue - Emit debug information entry offset. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of debug information entry in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const { + return sizeof(int32_t); + } + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIEBlock - A block of values. Primarily used for location expressions. + // + class DIEBlock : public DIEValue, public DIE { + unsigned Size; // Size in bytes excluding size header. + public: + DIEBlock() + : DIEValue(isBlock), DIE(0), Size(0) {} + virtual ~DIEBlock() {} + + /// ComputeSize - calculate the size of the block. + /// + unsigned ComputeSize(AsmPrinter *AP); + + /// BestForm - Choose the best form for data. + /// + unsigned BestForm() const { + if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit block data. + /// + virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + + /// SizeOf - Determine size of block data in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isBlock; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O); +#endif + }; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp new file mode 100644 index 0000000..f58ec9b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -0,0 +1,264 @@ +//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "DwarfAccelTable.h" +#include "DIE.h" +#include "DwarfDebug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { + switch (AT) { + case eAtomTypeNULL: return "eAtomTypeNULL"; + case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; + case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; + case eAtomTypeTag: return "eAtomTypeTag"; + case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; + case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; + } + llvm_unreachable("invalid AtomType!"); +} + +// The length of the header data is always going to be 4 + 4 + 4*NumAtoms. +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : + Header(8 + (atomList.size() * 4)), + HeaderData(atomList), + Entries(Allocator) { } + +DwarfAccelTable::~DwarfAccelTable() { } + +void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { + assert(Data.empty() && "Already finalized!"); + // If the string is in the list already then add this die to the list + // otherwise add a new one. + DataArray &DIEs = Entries[Name]; + DIEs.push_back(new (Allocator) HashDataContents(die, Flags)); +} + +void DwarfAccelTable::ComputeBucketCount(void) { + // First get the number of unique hashes. + std::vector<uint32_t> uniques(Data.size()); + for (size_t i = 0, e = Data.size(); i < e; ++i) + uniques[i] = Data[i]->HashValue; + array_pod_sort(uniques.begin(), uniques.end()); + std::vector<uint32_t>::iterator p = + std::unique(uniques.begin(), uniques.end()); + uint32_t num = std::distance(uniques.begin(), p); + + // Then compute the bucket size, minimum of 1 bucket. + if (num > 1024) Header.bucket_count = num/4; + if (num > 16) Header.bucket_count = num/2; + else Header.bucket_count = num > 0 ? num : 1; + + Header.hashes_count = num; +} + +// compareDIEs - comparison predicate that sorts DIEs by their offset. +static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, + const DwarfAccelTable::HashDataContents *B) { + return A->Die->getOffset() < B->Die->getOffset(); +} + +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { + // Create the individual hash data outputs. + for (StringMap<DataArray>::iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + + // Unique the entries. + std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); + EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), + EI->second.end()); + + HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); + Data.push_back(Entry); + } + + // Figure out how many buckets we need, then compute the bucket + // contents and the final ordering. We'll emit the hashes and offsets + // by doing a walk during the emission phase. We add temporary + // symbols to the data so that we can reference them during the offset + // later, we'll emit them when we emit the data. + ComputeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(Header.bucket_count); + for (size_t i = 0, e = Data.size(); i < e; ++i) { + uint32_t bucket = Data[i]->HashValue % Header.bucket_count; + Buckets[bucket].push_back(Data[i]); + Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + } +} + +// Emits the header for the table via the AsmPrinter. +void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { + Asm->OutStreamer.AddComment("Header Magic"); + Asm->EmitInt32(Header.magic); + Asm->OutStreamer.AddComment("Header Version"); + Asm->EmitInt16(Header.version); + Asm->OutStreamer.AddComment("Header Hash Function"); + Asm->EmitInt16(Header.hash_function); + Asm->OutStreamer.AddComment("Header Bucket Count"); + Asm->EmitInt32(Header.bucket_count); + Asm->OutStreamer.AddComment("Header Hash Count"); + Asm->EmitInt32(Header.hashes_count); + Asm->OutStreamer.AddComment("Header Data Length"); + Asm->EmitInt32(Header.header_data_len); + Asm->OutStreamer.AddComment("HeaderData Die Offset Base"); + Asm->EmitInt32(HeaderData.die_offset_base); + Asm->OutStreamer.AddComment("HeaderData Atom Count"); + Asm->EmitInt32(HeaderData.Atoms.size()); + for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { + Atom A = HeaderData.Atoms[i]; + Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->EmitInt16(A.type); + Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); + Asm->EmitInt16(A.form); + } +} + +// Walk through and emit the buckets for the table. Each index is +// an offset into the list of hashes. +void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { + unsigned index = 0; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + Asm->OutStreamer.AddComment("Bucket " + Twine(i)); + if (Buckets[i].size() != 0) + Asm->EmitInt32(index); + else + Asm->EmitInt32(UINT32_MAX); + index += Buckets[i].size(); + } +} + +// Walk through the buckets and emit the individual hashes for each +// bucket. +void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); + Asm->EmitInt32((*HI)->HashValue); + } + } +} + +// Walk through the buckets and emit the individual offsets for each +// element in each bucket. This is done via a symbol subtraction from the +// beginning of the section. The non-section symbol will be output later +// when we emit the actual data. +void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); + MCContext &Context = Asm->OutStreamer.getContext(); + const MCExpr *Sub = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), + Context); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); + } + } +} + +// Walk through the buckets and emit the full data for each element in +// the bucket. For the string case emit the dies and the various offsets. +// Terminate each HashData bucket with 0. +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { + uint64_t PrevHash = UINT64_MAX; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + // Remember to emit the label for our offset. + Asm->OutStreamer.EmitLabel((*HI)->Sym); + Asm->OutStreamer.AddComment((*HI)->Str); + Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), + D->getStringPoolSym()); + Asm->OutStreamer.AddComment("Num DIEs"); + Asm->EmitInt32((*HI)->Data.size()); + for (ArrayRef<HashDataContents*>::const_iterator + DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); + DI != DE; ++DI) { + // Emit the DIE offset + Asm->EmitInt32((*DI)->Die->getOffset()); + // If we have multiple Atoms emit that info too. + // FIXME: A bit of a hack, we either emit only one atom or all info. + if (HeaderData.Atoms.size() > 1) { + Asm->EmitInt16((*DI)->Die->getTag()); + Asm->EmitInt8((*DI)->Flags); + } + } + // Emit a 0 to terminate the data unless we have a hash collision. + if (PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); + PrevHash = (*HI)->HashValue; + } + } +} + +// Emit the entire data structure to the output file. +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, + DwarfUnits *D) { + // Emit the header. + EmitHeader(Asm); + + // Emit the buckets. + EmitBuckets(Asm); + + // Emit the hashes. + EmitHashes(Asm); + + // Emit the offsets. + EmitOffsets(Asm, SecBegin); + + // Emit the hash data. + EmitData(Asm, D); +} + +#ifndef NDEBUG +void DwarfAccelTable::print(raw_ostream &O) { + + Header.print(O); + HeaderData.print(O); + + O << "Entries: \n"; + for (StringMap<DataArray>::const_iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + O << "Name: " << EI->getKeyData() << "\n"; + for (DataArray::const_iterator DI = EI->second.begin(), + DE = EI->second.end(); + DI != DE; ++DI) + (*DI)->print(O); + } + + O << "Buckets and Hashes: \n"; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) + (*HI)->print(O); + + O << "Data: \n"; + for (std::vector<HashData*>::const_iterator + DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) + (*DI)->print(O); + + +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h new file mode 100644 index 0000000..9915bca --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -0,0 +1,283 @@ +//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ +#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ + +#include "DIE.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include <map> +#include <vector> + +// The dwarf accelerator tables are an indirect hash table optimized +// for null lookup rather than access to known data. They are output into +// an on-disk format that looks like this: +// +// .-------------. +// | HEADER | +// |-------------| +// | BUCKETS | +// |-------------| +// | HASHES | +// |-------------| +// | OFFSETS | +// |-------------| +// | DATA | +// `-------------' +// +// where the header contains a magic number, version, type of hash function, +// the number of buckets, total number of hashes, and room for a special +// struct of data and the length of that struct. +// +// The buckets contain an index (e.g. 6) into the hashes array. The hashes +// section contains all of the 32-bit hash values in contiguous memory, and +// the offsets contain the offset into the data area for the particular +// hash. +// +// For a lookup example, we could hash a function name and take it modulo the +// number of buckets giving us our bucket. From there we take the bucket value +// as an index into the hashes table and look at each successive hash as long +// as the hash value is still the same modulo result (bucket value) as earlier. +// If we have a match we look at that same entry in the offsets table and +// grab the offset in the data for our final match. + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfUnits; + +class DwarfAccelTable { + + enum HashFunctionType { + eHashFunctionDJB = 0u + }; + + static uint32_t HashDJB (StringRef Str) { + uint32_t h = 5381; + for (unsigned i = 0, e = Str.size(); i != e; ++i) + h = ((h << 5) + h) + Str[i]; + return h; + } + + // Helper function to compute the number of buckets needed based on + // the number of unique hashes. + void ComputeBucketCount (void); + + struct TableHeader { + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. + // Also written to disk is the implementation specific header data. + + static const uint32_t MagicHash = 0x48415348; + + TableHeader (uint32_t data_len) : + magic (MagicHash), version (1), hash_function (eHashFunctionDJB), + bucket_count (0), hashes_count (0), header_data_len (data_len) + {} + +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Magic: " << format("0x%x", magic) << "\n" + << "Version: " << version << "\n" + << "Hash Function: " << hash_function << "\n" + << "Bucket Count: " << bucket_count << "\n" + << "Header Data Length: " << header_data_len << "\n"; + } + void dump() { print(dbgs()); } +#endif + }; + +public: + // The HeaderData describes the form of each set of data. In general this + // is as a list of atoms (atom_count) where each atom contains a type + // (AtomType type) of data, and an encoding form (form). In the case of + // data that is referenced via DW_FORM_ref_* the die_offset_base is + // used to describe the offset for all forms in the list of atoms. + // This also serves as a public interface of sorts. + // When written to disk this will have the form: + // + // uint32_t die_offset_base + // uint32_t atom_count + // atom_count Atoms + enum AtomType { + eAtomTypeNULL = 0u, + eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding + eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that + // contains the item in question + eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as + // DW_FORM_data1 (if no tags exceed 255) or + // DW_FORM_data2. + eAtomTypeNameFlags = 4u, // Flags from enum NameFlags + eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags + }; + + enum TypeFlags { + eTypeFlagClassMask = 0x0000000fu, + + // Always set for C++, only set for ObjC if this is the + // @implementation for a class. + eTypeFlagClassIsImplementation = ( 1u << 1 ) + }; + + // Make these public so that they can be used as a general interface to + // the class. + struct Atom { + AtomType type; // enum AtomType + uint16_t form; // DWARF DW_FORM_ defines + + Atom(AtomType type, uint16_t form) : type(type), form(form) {} + static const char * AtomTypeString(enum AtomType); +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Type: " << AtomTypeString(type) << "\n" + << "Form: " << dwarf::FormEncodingString(form) << "\n"; + } + void dump() { + print(dbgs()); + } +#endif + }; + + private: + struct TableHeaderData { + uint32_t die_offset_base; + SmallVector<Atom, 1> Atoms; + + TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } + +#ifndef NDEBUG + void print (raw_ostream &O) { + O << "die_offset_base: " << die_offset_base << "\n"; + for (size_t i = 0; i < Atoms.size(); i++) + Atoms[i].print(O); + } + void dump() { + print(dbgs()); + } +#endif + }; + + // The data itself consists of a str_offset, a count of the DIEs in the + // hash and the offsets to the DIEs themselves. + // On disk each data section is ended with a 0 KeyType as the end of the + // hash chain. + // On output this looks like: + // uint32_t str_offset + // uint32_t hash_data_count + // HashData[hash_data_count] +public: + struct HashDataContents { + DIE *Die; // Offsets + char Flags; // Specific flags to output + + HashDataContents(DIE *D, char Flags) : + Die(D), + Flags(Flags) { } + #ifndef NDEBUG + void print(raw_ostream &O) const { + O << " Offset: " << Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; + O << " Flags: " << Flags << "\n"; + } + #endif + }; +private: + struct HashData { + StringRef Str; + uint32_t HashValue; + MCSymbol *Sym; + ArrayRef<HashDataContents*> Data; // offsets + HashData(StringRef S, ArrayRef<HashDataContents*> Data) + : Str(S), Data(Data) { + HashValue = DwarfAccelTable::HashDJB(S); + } + #ifndef NDEBUG + void print(raw_ostream &O) { + O << "Name: " << Str << "\n"; + O << " Hash Value: " << format("0x%x", HashValue) << "\n"; + O << " Symbol: " ; + if (Sym) Sym->print(O); + else O << "<none>"; + O << "\n"; + for (size_t i = 0; i < Data.size(); i++) { + O << " Offset: " << Data[i]->Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n"; + O << " Flags: " << Data[i]->Flags << "\n"; + } + } + void dump() { + print(dbgs()); + } + #endif + }; + + DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + + // Internal Functions + void EmitHeader(AsmPrinter *); + void EmitBuckets(AsmPrinter *); + void EmitHashes(AsmPrinter *); + void EmitOffsets(AsmPrinter *, MCSymbol *); + void EmitData(AsmPrinter *, DwarfUnits *D); + + // Allocator for HashData and HashDataContents. + BumpPtrAllocator Allocator; + + // Output Variables + TableHeader Header; + TableHeaderData HeaderData; + std::vector<HashData*> Data; + + // String Data + typedef std::vector<HashDataContents*> DataArray; + typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries; + StringEntries Entries; + + // Buckets/Hashes/Offsets + typedef std::vector<HashData*> HashList; + typedef std::vector<HashList> BucketList; + BucketList Buckets; + HashList Hashes; + + // Public Implementation + public: + DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); + ~DwarfAccelTable(); + void AddName(StringRef, DIE*, char = 0); + void FinalizeTable(AsmPrinter *, const char *); + void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); +#ifndef NDEBUG + void print(raw_ostream &O); + void dump() { print(dbgs()); } +#endif +}; + +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp new file mode 100644 index 0000000..fec5ced --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -0,0 +1,156 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +DwarfCFIException::DwarfCFIException(AsmPrinter *A) + : DwarfException(A), + shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false), + moveTypeModule(AsmPrinter::CFI_M_None) {} + +DwarfCFIException::~DwarfCFIException() {} + +/// EndModule - Emit all exception information that should come after the +/// content. +void DwarfCFIException::EndModule() { + if (moveTypeModule == AsmPrinter::CFI_M_Debug) + Asm->OutStreamer.EmitCFISections(false, true); + + if (!Asm->MAI->isExceptionHandlingDwarf()) + return; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + + if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + return; + + // Emit references to all used personality functions + bool AtLeastOne = false; + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + for (size_t i = 0, e = Personalities.size(); i != e; ++i) { + if (!Personalities[i]) + continue; + MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); + AtLeastOne = true; + } + + if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { + // This is a temporary hack to keep sections in the same order they + // were before. This lets us produce bit identical outputs while + // transitioning to CFI. + Asm->OutStreamer.SwitchSection( + const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection()); + } +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void DwarfCFIException::BeginFunction(const MachineFunction *MF) { + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + + // If any landing pads survive, we need an EH table. + bool hasLandingPads = !MMI->getLandingPads().empty(); + + // See if we need frame move info. + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + if (MoveType == AsmPrinter::CFI_M_EH || + (MoveType == AsmPrinter::CFI_M_Debug && + moveTypeModule == AsmPrinter::CFI_M_None)) + moveTypeModule = MoveType; + + shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + + shouldEmitPersonality = hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per; + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + if (!shouldEmitPersonality && !shouldEmitMoves) + return; + + Asm->OutStreamer.EmitCFIStartProc(); + + // Indicate personality routine, if any. + if (!shouldEmitPersonality) + return; + + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); + + Asm->OutStreamer.EmitDebugLabel + (Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); + + // Provide LSDA information. + if (!shouldEmitLSDA) + return; + + Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber()), + LSDAEncoding); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void DwarfCFIException::EndFunction() { + if (!shouldEmitPersonality && !shouldEmitMoves) + return; + + Asm->OutStreamer.EmitCFIEndProc(); + + if (!shouldEmitPersonality) + return; + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + EmitExceptionTable(); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 0000000..f9b6f94 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,1711 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for constructing a dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DwarfCompileUnit.h" +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/DIBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +/// CompileUnit - Compile unit constructor. +CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, + DwarfDebug *DW, DwarfUnits *DWU) + : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), + IndexTyDie(0), DebugInfoOffset(0) { + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); +} + +/// ~CompileUnit - Destructor for compile unit. +CompileUnit::~CompileUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); +} + +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); + return Value; +} + +/// getDefaultLowerBound - Return the default lower bound for an array. If the +/// DWARF version doesn't handle the language, return -1. +int64_t CompileUnit::getDefaultLowerBound() const { + switch (Language) { + default: + break; + + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_ObjC: + case dwarf::DW_LANG_ObjC_plus_plus: + return 0; + + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran95: + return 1; + + // The languages below have valid values only if the DWARF version >= 4. + case dwarf::DW_LANG_Java: + case dwarf::DW_LANG_Python: + case dwarf::DW_LANG_UPC: + case dwarf::DW_LANG_D: + if (dwarf::DWARF_VERSION >= 4) + return 0; + break; + + case dwarf::DW_LANG_Ada83: + case dwarf::DW_LANG_Ada95: + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + case dwarf::DW_LANG_Modula2: + case dwarf::DW_LANG_Pascal83: + case dwarf::DW_LANG_PLI: + if (dwarf::DWARF_VERSION >= 4) + return 1; + break; + } + + return -1; +} + +/// addFlag - Add a flag that is true. +void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { + if (!DD->useDarwinGDBCompat()) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, + DIEIntegerOne); + else + addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); +} + +/// addUInt - Add an unsigned integer attribute data and value. +/// +void CompileUnit::addUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addSInt - Add an signed integer attribute data and value. +/// +void CompileUnit::addSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addString - Add a string attribute data and value. We always emit a +/// reference to the string pool instead of immediate strings so that DIEs have +/// more predictable sizes. In the case of split dwarf we emit an index +/// into another table which gets us the static offset into the string +/// table. +void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { + if (!DD->useSplitDwarf()) { + MCSymbol *Symb = DU->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->needsRelocationsForDwarfStringPool()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DU->getStringPoolSym(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); + } + Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + } else { + unsigned idx = DU->getStringPoolIndex(String); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value); + } +} + +/// addLocalString - Add a string attribute data and value. This is guaranteed +/// to be in the local string pool instead of indirected. +void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, + StringRef String) { + MCSymbol *Symb = DU->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->needsRelocationsForDwarfStringPool()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DU->getStringPoolSym(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); + } + Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); +} + +/// addLabel - Add a Dwarf label attribute data and value. +/// +void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, Form, Value); +} + +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, + MCSymbol *Label) { + if (!DD->useSplitDwarf()) { + if (Label != NULL) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } else { + DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } + } else { + unsigned idx = DU->getAddrPoolIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + +/// addOpAddress - Add a dwarf op address data and value using the +/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { + + if (!DD->useSplitDwarf()) { + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + } else { + unsigned idx = DU->getAddrPoolIndex(Sym); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + +/// addDelta - Add a label delta attribute data and value. +/// +void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die->addValue(Attribute, Form, Value); +} + +/// addDIEEntry - Add a DIE attribute data and value. +/// +void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, + DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); +} + +/// addBlock - Add block data. +/// +void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Block->BestForm(), Block); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { + // Verify variable. + if (!V.Verify()) + return; + + unsigned Line = V.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory(), + getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { + // Verify global variable. + if (!G.Verify()) + return; + + unsigned Line = G.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), + getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { + // Verify subprogram. + if (!SP.Verify()) + return; + + // If the line number is 0, don't add it. + unsigned Line = SP.getLineNumber(); + if (Line == 0) + return; + + unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), + SP.getDirectory(), getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), + Ty.getDirectory(), getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + DIFile File = Ty.getFile(); + unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), + File.getDirectory(), getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { + // Verify namespace. + if (!NS.Verify()) + return; + + unsigned Line = NS.getLineNumber(); + if (Line == 0) + return; + StringRef FN = NS.getFilename(); + + unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), + getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, + MachineLocation Location) { + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// addRegisterOp - Add register operand. +void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } +} + +/// addRegisterOffset - Add register offset. +void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, + int64_t Offset) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (Reg == TRI->getFrameRegister(*Asm->MF)) + // If variable offset is based in frame register then use fbreg. + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + else if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } + addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); +} + +/// addAddress - Add an address attribute to a die based on the location +/// provided. +void CompileUnit::addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned N = DV->getNumAddrElements(); + unsigned i = 0; + if (Location.isReg()) { + if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + i = 2; + } else + addRegisterOp(Block, Location.getReg()); + } + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + for (;i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + if (!Location.isReg()) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIBuilder Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... <various fields> + struct __Block_byref_x_VarName *forwarding; + ... <various other fields> + SomeType VarName; + ... <maybe more fields> + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst <forward_fld_offset> + DW_OP_deref + DW_OP_plus_uconst <varName_fld_offset> + + That is what this function does. */ + +/// addBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIType Ty = DV->getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + StringRef varName = DV->getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") + forwardingField = Element; + else if (fieldName == varName) + varField = Element; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = + DIDerivedType(forwardingField).getOffsetInBits() >> 3; + unsigned varFieldOffset = + DIDerivedType(varField).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// isTypeSigned - Return true if the type is signed. +static bool isTypeSigned(DIType Ty, int *SizeInBits) { + if (Ty.isDerivedType()) + return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits); + if (Ty.isBasicType()) + if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed + || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { + *SizeInBits = Ty.getSizeInBits(); + return true; + } + return false; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, + DIType Ty) { + assert(MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + int SizeInBits = -1; + bool SignedConstant = isTypeSigned(Ty, &SizeInBits); + unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata; + switch (SizeInBits) { + case 8: Form = dwarf::DW_FORM_data1; break; + case 16: Form = dwarf::DW_FORM_data2; break; + case 32: Form = dwarf::DW_FORM_data4; break; + case 64: Form = dwarf::DW_FORM_data8; break; + default: break; + } + SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) + : addUInt(Block, 0, Form, MO.getImm()); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, + bool Unsigned) { + return addConstantValue(Die, CI->getValue(), Unsigned); +} + +// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, + bool Unsigned) { + unsigned CIBitWidth = Val.getBitWidth(); + if (CIBitWidth <= 64) { + unsigned form = 0; + switch (CIBitWidth) { + case 8: form = dwarf::DW_FORM_data1; break; + case 16: form = dwarf::DW_FORM_data2; break; + case 32: form = dwarf::DW_FORM_data4; break; + case 64: form = dwarf::DW_FORM_data8; break; + default: + form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; + } + if (Unsigned) + addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); + else + addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); + return true; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + // Get the raw data form of the large APInt. + const uint64_t *Ptr64 = Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + + // Output the constant to DWARF one byte at a time. + for (int i = 0; i < NumBytes; i++) { + uint8_t c; + if (LittleEndian) + c = Ptr64[i / 8] >> (8 * (i & 7)); + else + c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); + addUInt(Block, 0, dwarf::DW_FORM_data1, c); + } + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addTemplateParams - Add template parameters in buffer. +void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { + // Add template parameters. + for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { + DIDescriptor Element = TParams.getElement(i); + if (Element.isTemplateTypeParameter()) + Buffer.addChild(getOrCreateTemplateTypeParameterDIE( + DITemplateTypeParameter(Element))); + else if (Element.isTemplateValueParameter()) + Buffer.addChild(getOrCreateTemplateValueParameterDIE( + DITemplateValueParameter(Element))); + } +} + +/// getOrCreateContextDIE - Get context owner's DIE. +DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { + if (Context.isType()) + return getOrCreateTypeDIE(DIType(Context)); + else if (Context.isNameSpace()) + return getOrCreateNameSpace(DINameSpace(Context)); + else if (Context.isSubprogram()) + return getOrCreateSubprogramDIE(DISubprogram(Context)); + else + return getDIE(Context); +} + +/// addToContextOwner - Add Die into the list of its context owner's children. +void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (DIE *ContextDIE = getOrCreateContextDIE(Context)) + ContextDIE->addChild(Die); + else + addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { + DIType Ty(TyNode); + if (!Ty.Verify()) + return NULL; + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + insertDIE(Ty, TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty)); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + } + // If this is a named finished type then include it in the list of types + // for the accelerator tables. + if (!Ty.getName().empty() && !Ty.isForwardDecl()) { + bool IsImplementation = 0; + if (Ty.isCompositeType()) { + DICompositeType CT(Ty); + // A runtime language of 0 actually means C/C++ and that any + // non-negative value is some version of Objective-C/C++. + IsImplementation = (CT.getRunTimeLang() == 0) || + CT.isObjcClassComplete(); + } + unsigned Flags = IsImplementation ? + DwarfAccelTable::eTypeFlagClassIsImplementation : 0; + addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + +/// addType - Add a new type attribute to the specified entity. +void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { + if (!Ty.Verify()) + return; + + // Check for pre-existence. + DIEEntry *Entry = getDIEEntry(Ty); + // If it exists then use the existing value. + if (Entry) { + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + return; + } + + // Construct type. + DIE *Buffer = getOrCreateTypeDIE(Ty); + + // Set up proxy. + Entry = createDIEEntry(Buffer); + insertDIEEntry(Ty, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + + // If this is a complete composite type then include it in the + // list of global types. + addGlobalType(Ty); +} + +/// addGlobalType - Add a new global type to the compile unit. +/// +void CompileUnit::addGlobalType(DIType Ty) { + DIDescriptor Context = Ty.getContext(); + if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() + && (!Context || Context.isCompileUnit() || Context.isFile() + || Context.isNameSpace())) + if (DIEEntry *Entry = getDIEEntry(Ty)) + GlobalTypes[Ty.getName()] = Entry->getEntry(); +} + +/// addPubTypes - Add type for pubtypes section. +void CompileUnit::addPubTypes(DISubprogram SP) { + DICompositeType SPTy = SP.getType(); + unsigned SPTag = SPTy.getTag(); + if (SPTag != dwarf::DW_TAG_subroutine_type) + return; + + DIArray Args = SPTy.getTypeArray(); + for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { + DIType ATy(Args.getElement(i)); + if (!ATy.Verify()) + continue; + addGlobalType(ATy); + } +} + +/// constructTypeDIE - Construct basic type die from DIBasicType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { + // Get core information. + StringRef Name = BTy.getName(); + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, Name); + + if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { + Buffer.setTag(dwarf::DW_TAG_unspecified_type); + // Unspecified types has only name, nothing else. + return; + } + + Buffer.setTag(dwarf::DW_TAG_base_type); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + uint64_t Size = BTy.getSizeInBits() >> 3; + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { + // Get core information. + StringRef Name = DTy.getName(); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + addType(&Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size && Tag != dwarf::DW_TAG_pointer_type) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + if (Tag == dwarf::DW_TAG_ptr_to_member_type) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DTy.getClassType())); + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + addSourceLine(&Buffer, DTy); +} + +/// constructTypeDIE - Construct type DIE from DICompositeType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Get core information. + StringRef Name = CTy.getName(); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIDescriptor Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); + Buffer.addChild(ElemDie); + } + } + DIType DTy = CTy.getTypeDerivedFrom(); + if (DTy.Verify()) { + addType(&Buffer, DTy); + addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + addType(&Buffer, DIType(RTy)); + + bool isPrototyped = true; + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Elements.getElement(i); + if (Ty.isUnspecifiedParameter()) { + DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); + Buffer.addChild(Arg); + isPrototyped = false; + } else { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Ty)); + if (DIType(Ty).isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + Buffer.addChild(Arg); + } + } + // Add prototype flag if we're dealing with a C language and the + // function has been prototyped. + if (isPrototyped && + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(&Buffer, dwarf::DW_AT_prototyped); + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + unsigned N = Elements.getNumElements(); + if (N == 0) + break; + + // Add elements to structure type. + for (unsigned i = 0; i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.isSubprogram()) { + DISubprogram SP(Element); + ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); + if (SP.isProtected()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + if (SP.isExplicit()) + addFlag(ElemDie, dwarf::DW_AT_explicit); + } else if (Element.isDerivedType()) { + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = new DIE(dwarf::DW_TAG_friend); + addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); + } else if (DDTy.isStaticMember()) + ElemDie = createStaticMemberDIE(DDTy); + else + ElemDie = createMemberDIE(DDTy); + } else if (Element.isObjCProperty()) { + DIObjCProperty Property(Element); + ElemDie = new DIE(Property.getTag()); + StringRef PropertyName = Property.getObjCPropertyName(); + addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); + StringRef GetterName = Property.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); + StringRef SetterName = Property.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); + unsigned PropertyAttributes = 0; + if (Property.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (Property.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (Property.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (Property.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (Property.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (Property.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, + PropertyAttributes); + + DIEEntry *Entry = getDIEEntry(Element); + if (!Entry) { + Entry = createDIEEntry(ElemDie); + insertDIEEntry(Element, Entry); + } + } else + continue; + Buffer.addChild(ElemDie); + } + + if (CTy.isAppleBlockExtension()) + addFlag(&Buffer, dwarf::DW_AT_APPLE_block); + + DICompositeType ContainingType = CTy.getContainingType(); + if (DIDescriptor(ContainingType).isCompositeType()) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } + + if (CTy.isObjcClassComplete()) + addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); + + // Add template parameters to a class, structure or union types. + // FIXME: The support isn't in the metadata for this yet. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) + addTemplateParams(Buffer, CTy.getTemplateParams()); + + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) { + // Add size if non-zero (derived types might be zero-sized.) + // TODO: Do we care about size for enum forward declarations? + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else if (!CTy.isForwardDecl()) + // Add zero size if it is not a forward declaration. + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + + // If we're a forward decl, say so. + if (CTy.isForwardDecl()) + addFlag(&Buffer, dwarf::DW_AT_declaration); + + // Add source line info if available. + if (!CTy.isForwardDecl()) + addSourceLine(&Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + } +} + +/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateTypeParameter. +DIE * +CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { + DIE *ParamDIE = getDIE(TP); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); + addType(ParamDIE, TP.getType()); + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); + return ParamDIE; +} + +/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateValueParameter. +DIE * +CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ + DIE *ParamDIE = getDIE(TPV); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); + addType(ParamDIE, TPV.getType()); + if (!TPV.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); + addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + TPV.getValue()); + return ParamDIE; +} + +/// getOrCreateNameSpace - Create a DIE for DINameSpace. +DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + DIE *NDie = getDIE(NS); + if (NDie) + return NDie; + NDie = new DIE(dwarf::DW_TAG_namespace); + insertDIE(NS, NDie); + if (!NS.getName().empty()) { + addString(NDie, dwarf::DW_AT_name, NS.getName()); + addAccelNamespace(NS.getName(), NDie); + } else + addAccelNamespace("(anonymous namespace)", NDie); + addSourceLine(NDie, NS); + addToContextOwner(NDie, NS.getContext()); + return NDie; +} + +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + +/// getOrCreateSubprogramDIE - Create new DIE using SP. +DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { + DIE *SPDie = getDIE(SP); + if (SPDie) + return SPDie; + + SPDie = new DIE(dwarf::DW_TAG_subprogram); + + // DW_TAG_inlined_subroutine may refer to this DIE. + insertDIE(SP, SPDie); + + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIE *DeclDie = NULL; + if (SPDecl.isSubprogram()) { + DeclDie = getOrCreateSubprogramDIE(SPDecl); + } + + // Add to context owner. + addToContextOwner(SPDie, SP.getContext()); + + // Add function template parameters. + addTemplateParams(*SPDie, SP.getTemplateParams()); + + // Unfortunately this code needs to stay here instead of below the + // AT_specification code in order to work around a bug in older + // gdbs that requires the linkage name to resolve multiple template + // functions. + // TODO: Remove this set of code when we get rid of the old gdb + // compatibility. + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty() && DD->useDarwinGDBCompat()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + + // If this DIE is going to refer declaration info using AT_specification + // then there is no need to add other attributes. + if (DeclDie) { + // Refer function declaration directly. + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + DeclDie); + + return SPDie; + } + + // Add the linkage name if we have one. + if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + + // Constructors and operators for anonymous aggregates do not have names. + if (!SP.getName().empty()) + addString(SPDie, dwarf::DW_AT_name, SP.getName()); + + addSourceLine(SPDie, SP); + + // Add the prototype if we have a prototype and we have a C like + // language. + if (SP.isPrototyped() && + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(SPDie, dwarf::DW_AT_prototyped); + + // Add Return Type. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + + if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) + addType(SPDie, SPTy); + else + addType(SPDie, DIType(Args.getElement(0))); + + unsigned VK = SP.getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); + DIEBlock *Block = getDIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + ContainingTypeMap.insert(std::make_pair(SPDie, + SP.getContainingType())); + } + + if (!SP.isDefinition()) { + addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled while processing variables. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(Args.getElement(i)); + addType(Arg, ATy); + if (ATy.isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + SPDie->addChild(Arg); + } + } + + if (SP.isArtificial()) + addFlag(SPDie, dwarf::DW_AT_artificial); + + if (!SP.isLocalToUnit()) + addFlag(SPDie, dwarf::DW_AT_external); + + if (SP.isOptimized()) + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); + + if (unsigned isa = Asm->getISAEncoding()) { + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } + + return SPDie; +} + +// Return const expression if value is a GEP to access merged global +// constant. e.g. +// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) +static const ConstantExpr *getMergedGlobalExpr(const Value *V) { + const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); + if (!CE || CE->getNumOperands() != 3 || + CE->getOpcode() != Instruction::GetElementPtr) + return NULL; + + // First operand points to a global struct. + Value *Ptr = CE->getOperand(0); + if (!isa<GlobalValue>(Ptr) || + !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType())) + return NULL; + + // Second operand is zero. + const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); + if (!CI || !CI->isZero()) + return NULL; + + // Third operand is offset. + if (!isa<ConstantInt>(CE->getOperand(2))) + return NULL; + + return CE; +} + +/// createGlobalVariableDIE - create global variable DIE. +void CompileUnit::createGlobalVariableDIE(const MDNode *N) { + // Check for pre-existence. + if (getDIE(N)) + return; + + DIGlobalVariable GV(N); + if (!GV.Verify()) + return; + + DIDescriptor GVContext = GV.getContext(); + DIType GTy = GV.getType(); + + // If this is a static data member definition, some attributes belong + // to the declaration DIE. + DIE *VariableDIE = NULL; + bool IsStaticMember = false; + DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); + if (SDMDecl.Verify()) { + assert(SDMDecl.isStaticMember() && "Expected static member decl"); + // We need the declaration DIE that is in the static member's class. + // But that class might not exist in the DWARF yet. + // Creating the class will create the static member decl DIE. + getOrCreateContextDIE(SDMDecl.getContext()); + VariableDIE = getDIE(SDMDecl); + assert(VariableDIE && "Static member decl has no context?"); + IsStaticMember = true; + } + + // If this is not a static data member definition, create the variable + // DIE and add the initial set of attributes to it. + if (!VariableDIE) { + VariableDIE = new DIE(GV.getTag()); + // Add to map. + insertDIE(N, VariableDIE); + + // Add name and type. + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) { + addFlag(VariableDIE, dwarf::DW_AT_external); + addGlobalName(GV.getName(), VariableDIE); + } + + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to context owner. + addToContextOwner(VariableDIE, GVContext); + } + + // Add location. + bool addToAccelTable = false; + DIE *VariableSpecDIE = NULL; + bool isGlobalVariable = GV.getGlobal() != NULL; + if (isGlobalVariable) { + addToAccelTable = true; + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && + !GVContext.isFile() && !isSubprogramContext(GVContext)) { + // Create specification DIE. + VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + // A static member's declaration is already flagged as such. + if (!SDMDecl.Verify()) + addFlag(VariableDIE, dwarf::DW_AT_declaration); + addDie(VariableSpecDIE); + } else { + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + } + // Add linkage name. + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty()) { + // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: + // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and + // TAG_variable. + addString(IsStaticMember && VariableSpecDIE ? + VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + // In compatibility mode with older gdbs we put the linkage name on both + // the TAG_variable DIE and on the TAG_member DIE. + if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat()) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + } + } else if (const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + // AT_const_value was added when the static member was created. To avoid + // emitting AT_const_value multiple times, we only add AT_const_value when + // it is not a static member. + if (!IsStaticMember) + addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addToAccelTable = true; + // GV is a merged global. + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + Value *Ptr = CE->getOperand(0); + addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr))); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end()); + addUInt(Block, 0, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + } + + if (addToAccelTable) { + DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE; + addAccelName(GV.getName(), AddrDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + addAccelName(GV.getLinkageName(), AddrDIE); + } + + return; +} + +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, + DIE *IndexTy) { + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + + // The LowerBound value defines the lower bounds which is typically zero for + // C/C++. The Count value is the number of elements. Values are 64 bit. If + // Count == -1 then the array is unbounded and we do not emit + // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and + // Count == 0, then the array has zero elements in which case we do not emit + // an upper bound. + int64_t LowerBound = SR.getLo(); + int64_t DefaultLowerBound = getDefaultLowerBound(); + int64_t Count = SR.getCount(); + + if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound); + + if (Count != -1 && Count != 0) + // FIXME: An unbounded array should reference the expression that defines + // the array. + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1); + + Buffer.addChild(DW_Subrange); +} + +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->isVector()) + addFlag(&Buffer, dwarf::DW_AT_GNU_vector); + + // Emit derived type. + addType(&Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Get an anonymous type for index type. + // FIXME: This type should be passed down from the front end + // as different languages may have different sizes for indexes. + DIE *IdxTy = getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = new DIE(dwarf::DW_TAG_base_type); + addString(IdxTy, dwarf::DW_AT_name, "int"); + addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + addDie(IdxTy); + setIndexTyDie(IdxTy); + } + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + } +} + +/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + StringRef Name = ETy.getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + int64_t Value = ETy.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// constructContainingTypeDIEs - Construct DIEs for types that contain +/// vtables. +void CompileUnit::constructContainingTypeDIEs() { + for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), + CE = ContainingTypeMap.end(); CI != CE; ++CI) { + DIE *SPDie = CI->first; + const MDNode *N = CI->second; + if (!N) continue; + DIE *NDie = getDIE(N); + if (!NDie) continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + } +} + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { + StringRef Name = DV->getName(); + + // Translate tag to proper Dwarf tag. + unsigned Tag = DV->getTag(); + + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); + DbgVariable *AbsVar = DV->getAbstractVariable(); + DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; + if (AbsDIE) + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); + else { + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, DV->getVariable()); + addType(VariableDie, DV->getType()); + } + + if (DV->isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); + + if (isScopeAbstract) { + DV->setDIE(VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV->getDotDebugLocOffset(); + if (Offset != ~0U) { + addLabel(VariableDie, dwarf::DW_AT_location, + dwarf::DW_FORM_data4, + Asm->GetTempSymbol("debug_loc", Offset)); + DV->setDIE(VariableDie); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + if (const MachineInstr *DVInsn = DV->getMInsn()) { + bool updated = false; + if (DVInsn->getNumOperands() == 3) { + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (DVInsn->getOperand(1).isImm() && + TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, + DVInsn->getOperand(1).getImm(), + FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, VariableDie, Location); + + } else if (RegOp.getReg()) + addVariableAddress(DV, VariableDie, + MachineLocation(RegOp.getReg())); + updated = true; + } + else if (DVInsn->getOperand(0).isImm()) + updated = + addConstantValue(VariableDie, DVInsn->getOperand(0), + DV->getType()); + else if (DVInsn->getOperand(0).isFPImm()) + updated = + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + updated = + addConstantValue(VariableDie, + DVInsn->getOperand(0).getCImm(), + DV->getType().isUnsignedDIType()); + } else { + addVariableAddress(DV, VariableDie, + Asm->getDebugValueLocation(DVInsn)); + updated = true; + } + if (!updated) { + // If variableDie is not updated then DBG_VALUE instruction does not + // have valid variable info. + delete VariableDie; + return NULL; + } + DV->setDIE(VariableDie); + return VariableDie; + } else { + // .. else use frame index. + int FI = DV->getFrameIndex(); + if (FI != ~0) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, VariableDie, Location); + } + } + + DV->setDIE(VariableDie); + return VariableDie; +} + +/// createMemberDIE - Create new member DIE. +DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { + DIE *MemberDie = new DIE(DT.getTag()); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, Name); + + addType(MemberDie, DT.getTypeDerivedFrom()); + + addSourceLine(MemberDie, DT); + + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); + + } else + // This is not a bitfield. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + + if (DT.getTag() == dwarf::DW_TAG_inheritance + && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + VBaseLocationDie); + } else + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + + if (DT.isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + if (MDNode *PNode = DT.getObjCProperty()) + if (DIEEntry *PropertyDie = getDIEEntry(PNode)) + MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, + PropertyDie); + + if (DT.isArtificial()) + addFlag(MemberDie, dwarf::DW_AT_artificial); + + return MemberDie; +} + +/// createStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { + if (!DT.Verify()) + return NULL; + + DIE *StaticMemberDIE = new DIE(DT.getTag()); + DIType Ty = DT.getTypeDerivedFrom(); + + addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); + addType(StaticMemberDIE, Ty); + addSourceLine(StaticMemberDIE, DT); + addFlag(StaticMemberDIE, dwarf::DW_AT_external); + addFlag(StaticMemberDIE, dwarf::DW_AT_declaration); + + // FIXME: We could omit private if the parent is a class_type, and + // public if the parent is something else. + if (DT.isProtected()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) + addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) + addConstantFPValue(StaticMemberDIE, CFP); + + insertDIE(DT, StaticMemberDIE); + return StaticMemberDIE; +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 0000000..2b180c6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,383 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DIE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo.h" + +namespace llvm { + +class DwarfDebug; +class DwarfUnits; +class MachineLocation; +class MachineOperand; +class ConstantInt; +class ConstantFP; +class DbgVariable; + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associated +/// with a source file. +class CompileUnit { + /// UniqueID - a numeric ID unique among all CUs in the module + /// + unsigned UniqueID; + + /// Language - The DW_AT_language of the compile unit + /// + unsigned Language; + + /// Die - Compile unit debug information entry. + /// + const OwningPtr<DIE> CUDie; + + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + // Holders for some common dwarf information. + DwarfDebug *DD; + DwarfUnits *DU; + + /// IndexTyDie - An anonymous type for index type. Owned by CUDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; + + /// GlobalNames - A map of globally visible named entities for this unit. + /// + StringMap<DIE*> GlobalNames; + + /// GlobalTypes - A map of globally visible types for this unit. + /// + StringMap<DIE*> GlobalTypes; + + /// AccelNames - A map of names for the name accelerator table. + /// + StringMap<std::vector<DIE*> > AccelNames; + StringMap<std::vector<DIE*> > AccelObjC; + StringMap<std::vector<DIE*> > AccelNamespace; + StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + + /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that + /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. + DenseMap<DIE *, const MDNode *> ContainingTypeMap; + + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; + + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; + + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIDescriptor Context); + +public: + CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, + DwarfUnits *); + ~CompileUnit(); + + // Accessors. + unsigned getUniqueID() const { return UniqueID; } + unsigned getLanguage() const { return Language; } + DIE* getCUDie() const { return CUDie.get(); } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } + const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } + const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } + + const StringMap<std::vector<DIE*> > &getAccelNames() const { + return AccelNames; + } + const StringMap<std::vector<DIE*> > &getAccelObjC() const { + return AccelObjC; + } + const StringMap<std::vector<DIE*> > &getAccelNamespace() const { + return AccelNamespace; + } + const StringMap<std::vector<std::pair<DIE*, unsigned > > > + &getAccelTypes() const { + return AccelTypes; + } + + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !CUDie->getChildren().empty(); } + + /// addGlobalName - Add a new global entity to the compile unit. + /// + void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + + /// addGlobalType - Add a new global type to the compile unit. + /// + void addGlobalType(DIType Ty); + + + /// addAccelName - Add a new name to the name accelerator table. + void addAccelName(StringRef Name, DIE *Die) { + std::vector<DIE*> &DIEs = AccelNames[Name]; + DIEs.push_back(Die); + } + void addAccelObjC(StringRef Name, DIE *Die) { + std::vector<DIE*> &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); + } + void addAccelNamespace(StringRef Name, DIE *Die) { + std::vector<DIE*> &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); + } + void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { + std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); + } + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + + DIEBlock *getDIEBlock() { + return new (DIEValueAllocator) DIEBlock(); + } + + /// insertDIE - Insert DIE into the map. + void insertDIE(const MDNode *N, DIE *D) { + MDNodeToDieMap.insert(std::make_pair(N, D)); + } + + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = + MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) + return NULL; + return I->second; + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + /// addDie - Adds or interns the DIE to the compile unit. + /// + void addDie(DIE *Buffer) { + this->CUDie->addChild(Buffer); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { + return IndexTyDie; + } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { + IndexTyDie = D; + } + + /// addFlag - Add a flag that is true to the DIE. + void addFlag(DIE *Die, unsigned Attribute); + + /// addUInt - Add an unsigned integer attribute data and value. + /// + void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + /// + void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + /// + void addString(DIE *Die, unsigned Attribute, const StringRef Str); + + /// addLocalString - Add a string attribute data and value. + /// + void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str); + + /// addLabel - Add a Dwarf label attribute data and value. + /// + void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + + /// addOpAddress - Add a dwarf op address data and value using the + /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addOpAddress(DIE *Die, MCSymbol *Label); + + /// addDelta - Add a label delta attribute data and value. + /// + void addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + /// + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + + /// addBlock - Add block data. + /// + void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); + void addSourceLine(DIE *Die, DIObjCProperty Ty); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); + bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); + + /// addTemplateParams - Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DIArray TParams); + + /// addRegisterOp - Add register operand. + void addRegisterOp(DIE *TheDie, unsigned Reg); + + /// addRegisterOffset - Add register offset. + void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + /// + void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a + /// DbgVariable based on provided MachineLocation. + void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); + + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + + /// addType - Add a new type attribute to the specified entity. This takes + /// and attribute parameter because DW_AT_friend attributes are also + /// type references. + void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateSubprogramDIE - Create new DIE using SP. + DIE *getOrCreateSubprogramDIE(DISubprogram SP); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(const MDNode *N); + + /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateTypeParameter. + DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create + /// new DIE for the given DITemplateValueParameter. + DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(const MDNode *N); + + void addPubTypes(DISubprogram SP); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, + DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, + DIDerivedType DTy); + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, + DICompositeType CTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *constructEnumTypeDIE(DIEnumerator ETy); + + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract); + + /// createMemberDIE - Create new member DIE. + DIE *createMemberDIE(DIDerivedType DT); + + /// createStaticMemberDIE - Create new static data member DIE. + DIE *createStaticMemberDIE(DIDerivedType DT); + +private: + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + DIEInteger *DIEIntegerOne; +}; + +} // end llvm namespace +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp new file mode 100644 index 0000000..d3cb4f9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -0,0 +1,2570 @@ +//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" +#include "DwarfDebug.h" +#include "DIE.h" +#include "DwarfAccelTable.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", + cl::Hidden, + cl::desc("Disable debug info printing")); + +static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::init(false)); + +static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames", + cl::Hidden, cl::init(false), + cl::desc("Generate DWARF pubnames section")); + +namespace { + enum DefaultOnOff { + Default, Enable, Disable + }; +} + +static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, + cl::desc("Compatibility with Darwin gdb."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, + cl::desc("Output prototype dwarf split debug info."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + +namespace { + const char *DWARFGroupName = "DWARF Emission"; + const char *DbgTimerName = "DWARF Debug Writer"; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// + +// Configuration values for initial hash set sizes (log2). +// +static const unsigned InitAbbreviationsSetSize = 9; // log2(512) + +namespace llvm { + +DIType DbgVariable::getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); + } + } + return Ty; +} + +} // end llvm namespace + +DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) + : Asm(A), MMI(Asm->MMI), FirstCU(0), + AbbreviationsSet(InitAbbreviationsSetSize), + SourceIdMap(DIEValueAllocator), + PrevLabel(NULL), GlobalCUIndexCount(0), + InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", + DIEValueAllocator), + SkeletonAbbrevSet(InitAbbreviationsSetSize), + SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", + DIEValueAllocator) { + + DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; + DwarfStrSectionSym = TextSectionSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; + DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; + FunctionBeginSym = FunctionEndSym = 0; + + // Turn on accelerator tables and older gdb compatibility + // for Darwin. + bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + if (DarwinGDBCompat == Default) { + if (IsDarwin) + IsDarwinGDBCompat = true; + else + IsDarwinGDBCompat = false; + } else + IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; + + if (DwarfAccelTables == Default) { + if (IsDarwin) + HasDwarfAccelTables = true; + else + HasDwarfAccelTables = false; + } else + HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + + if (SplitDwarf == Default) + HasSplitDwarf = false; + else + HasSplitDwarf = SplitDwarf == Enable ? true : false; + + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + beginModule(); + } +} +DwarfDebug::~DwarfDebug() { +} + +// Switch to the specified MCSection and emit an assembler +// temporary label to it if SymbolStem is specified. +static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, + const char *SymbolStem = 0) { + Asm->OutStreamer.SwitchSection(Section); + if (!SymbolStem) return 0; + + MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); + Asm->OutStreamer.EmitLabel(TmpSym); + return TmpSym; +} + +MCSymbol *DwarfUnits::getStringPoolSym() { + return Asm->GetTempSymbol(StringPref); +} + +MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) { + std::pair<MCSymbol*, unsigned> &Entry = + StringPool.GetOrCreateValue(Str).getValue(); + if (Entry.first) return Entry.first; + + Entry.second = NextStringPoolNumber++; + return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); +} + +unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { + std::pair<MCSymbol*, unsigned> &Entry = + StringPool.GetOrCreateValue(Str).getValue(); + if (Entry.first) return Entry.second; + + Entry.second = NextStringPoolNumber++; + Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); + return Entry.second; +} + +unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { + std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym]; + if (Entry.first) return Entry.second; + + Entry.second = NextAddrPoolNumber++; + Entry.first = Sym; + return Entry.second; +} + +// Define a unique number for the abbreviation. +// +void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { + // Profile the node so that we can make it unique. + FoldingSetNodeID ID; + Abbrev.Profile(ID); + + // Check the set for priors. + DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); + + // If it's newly added. + if (InSet == &Abbrev) { + // Add to abbreviation list. + Abbreviations->push_back(&Abbrev); + + // Assign the vector position + 1 as its number. + Abbrev.setNumber(Abbreviations->size()); + } else { + // Assign existing abbreviation number. + Abbrev.setNumber(InSet->getNumber()); + } +} + +// If special LLVM prefix that is used to inform the asm +// printer to not emit usual symbol prefix before the symbol name is used then +// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + +static bool isObjCClass(StringRef Name) { + return Name.startswith("+") || Name.startswith("-"); +} + +static bool hasObjCCategory(StringRef Name) { + if (!isObjCClass(Name)) return false; + + size_t pos = Name.find(')'); + if (pos != std::string::npos) { + if (Name[pos+1] != ' ') return false; + return true; + } + return false; +} + +static void getObjCClassCategory(StringRef In, StringRef &Class, + StringRef &Category) { + if (!hasObjCCategory(In)) { + Class = In.slice(In.find('[') + 1, In.find(' ')); + Category = ""; + return; + } + + Class = In.slice(In.find('[') + 1, In.find('(')); + Category = In.slice(In.find('[') + 1, In.find(' ')); + return; +} + +static StringRef getObjCMethodName(StringRef In) { + return In.slice(In.find(' ') + 1, In.find(']')); +} + +// Add the various names to the Dwarf accelerator table names. +static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, + DIE* Die) { + if (!SP.isDefinition()) return; + + TheCU->addAccelName(SP.getName(), Die); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) + TheCU->addAccelName(SP.getLinkageName(), Die); + + // If this is an Objective-C selector name add it to the ObjC accelerator + // too. + if (isObjCClass(SP.getName())) { + StringRef Class, Category; + getObjCClassCategory(SP.getName(), Class, Category); + TheCU->addAccelObjC(Class, Die); + if (Category != "") + TheCU->addAccelObjC(Category, Die); + // Also add the base method name to the name table. + TheCU->addAccelName(getObjCMethodName(SP.getName()), Die); + } +} + +// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc +// and DW_AT_high_pc attributes. If there are global variables in this +// scope then create and insert DIEs for these variables. +DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, + const MDNode *SPNode) { + DIE *SPDie = SPCU->getDIE(SPNode); + + assert(SPDie && "Unable to find subprogram DIE!"); + DISubprogram SP(SPNode); + + // If we're updating an abstract DIE, then we will be adding the children and + // object pointer later on. But what we don't want to do is process the + // concrete DIE twice. + DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); + if (AbsSPDIE) { + bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); + // Pick up abstract subprogram DIE. + SPDie = new DIE(dwarf::DW_TAG_subprogram); + // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of + // DW_FORM_ref4. + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + AbsSPDIE); + SPCU->addDie(SPDie); + } else { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (!SPDecl.isSubprogram()) { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(Args.getElement(i)); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addFlag(Arg, dwarf::DW_AT_artificial); + if (ATy.isObjectPointer()) + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, Arg); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, SPDeclDie); + SPCU->addDie(SPDie); + } + } + } + + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, + Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, + Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber())); + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + MachineLocation Location(RI->getFrameRegister(*Asm->MF)); + SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + addSubprogramNames(SPCU, SP, SPDie); + + return SPDie; +} + +// Construct new DW_TAG_lexical_block for this scope and attach +// DW_AT_low_pc/DW_AT_high_pc labels. +DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, + LexicalScope *Scope) { + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; + + const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return 0; + + SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); + if (Ranges.size() > 1) { + // .debug_range section has not been laid out yet. Emit offset in + // .debug_range as a uint, size 4, for now. emitDIE will handle + // DW_AT_ranges appropriately. + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() + * Asm->getDataLayout().getPointerSize()); + for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); + DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); + } + DebugRangeSymbols.push_back(NULL); + DebugRangeSymbols.push_back(NULL); + return ScopeDIE; + } + + MCSymbol *Start = getLabelBeforeInsn(RI->first); + MCSymbol *End = getLabelAfterInsn(RI->second); + + if (End == 0) return 0; + + assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); + assert(End->isDefined() && "Invalid end label for an inlined scope!"); + + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End); + + return ScopeDIE; +} + +// This scope represents inlined body of a function. Construct DIE to +// represent this concrete inlined copy of the function. +DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, + LexicalScope *Scope) { + const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges(); + assert(Ranges.empty() == false && + "LexicalScope does not have instruction markers!"); + + if (!Scope->getScopeNode()) + return NULL; + DIScope DS(Scope->getScopeNode()); + DISubprogram InlinedSP = getDISubprogram(DS); + DIE *OriginDIE = TheCU->getDIE(InlinedSP); + if (!OriginDIE) { + DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram."); + return NULL; + } + + SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + + if (StartLabel == 0 || EndLabel == 0) { + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); + } + assert(StartLabel->isDefined() && + "Invalid starting label for an inlined scope!"); + assert(EndLabel->isDefined() && + "Invalid end label for an inlined scope!"); + + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); + + if (Ranges.size() > 1) { + // .debug_range section has not been laid out yet. Emit offset in + // .debug_range as a uint, size 4, for now. emitDIE will handle + // DW_AT_ranges appropriately. + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() + * Asm->getDataLayout().getPointerSize()); + for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); + DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); + } + DebugRangeSymbols.push_back(NULL); + DebugRangeSymbols.push_back(NULL); + } else { + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); + } + + InlinedSubprogramDIEs.insert(OriginDIE); + + // Track the start label for this inlined function. + //.debug_inlined section specification does not clearly state how + // to emit inlined scope that is split into multiple instruction ranges. + // For now, use first instruction range and emit low_pc/high_pc pair and + // corresponding .debug_inlined section entry for this pair. + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator + I = InlineInfo.find(InlinedSP); + + if (I == InlineInfo.end()) { + InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); + InlinedSPNodes.push_back(InlinedSP); + } else + I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); + + DILocation DL(Scope->getInlinedAt()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), + TheCU->getUniqueID())); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + addSubprogramNames(TheCU, InlinedSP, ScopeDIE); + + return ScopeDIE; +} + +// Construct a DIE for this scope. +DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (!Scope || !Scope->getScopeNode()) + return NULL; + + DIScope DS(Scope->getScopeNode()); + // Early return to avoid creating dangling variable|scope DIEs. + if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && + !TheCU->getDIE(DS)) + return NULL; + + SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = NULL; + + // Collect arguments for current function. + if (LScopes.isCurrentFunctionScope(Scope)) + for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) + if (DbgVariable *ArgDV = CurrentFnArguments[i]) + if (DIE *Arg = + TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { + Children.push_back(Arg); + if (ArgDV->isObjectPointer()) ObjectPointer = Arg; + } + + // Collect lexical scope children first. + const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) + if (DIE *Variable = + TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { + Children.push_back(Variable); + if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; + } + const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) + if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) + Children.push_back(Nested); + DIE *ScopeDIE = NULL; + if (Scope->getInlinedAt()) + ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); + else if (DS.isSubprogram()) { + ProcessedSPNodes.insert(DS); + if (Scope->isAbstractScope()) { + ScopeDIE = TheCU->getDIE(DS); + // Note down abstract DIE. + if (ScopeDIE) + AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); + } + else + ScopeDIE = updateSubprogramScopeDIE(TheCU, DS); + } + else { + // There is no need to emit empty lexical block DIE. + if (Children.empty()) + return NULL; + ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); + } + + if (!ScopeDIE) return NULL; + + // Add children + for (SmallVector<DIE *, 8>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + ScopeDIE->addChild(*I); + + if (DS.isSubprogram() && ObjectPointer != NULL) + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, + dwarf::DW_FORM_ref4, ObjectPointer); + + if (DS.isSubprogram()) + TheCU->addPubTypes(DISubprogram(DS)); + + return ScopeDIE; +} + +// Look up the source id with the given directory and source file names. +// If none currently exists, create a new id and insert it in the +// SourceIds map. This can update DirectoryNames and SourceFileNames maps +// as well. +unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, + StringRef DirName, unsigned CUID) { + // If we use .loc in assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + if (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + CUID = 0; + + // If FE did not provide a file name, then assume stdin. + if (FileName.empty()) + return getOrCreateSourceID("<stdin>", StringRef(), CUID); + + // TODO: this might not belong here. See if we can factor this better. + if (DirName == CompilationDir) + DirName = ""; + + // FileIDCUMap stores the current ID for the given compile unit. + unsigned SrcId = FileIDCUMap[CUID] + 1; + + // We look up the CUID/file/dir by concatenating them with a zero byte. + SmallString<128> NamePair; + NamePair += CUID; + NamePair += '\0'; + NamePair += DirName; + NamePair += '\0'; // Zero bytes are not allowed in paths. + NamePair += FileName; + + StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); + if (Ent.getValue() != SrcId) + return Ent.getValue(); + + FileIDCUMap[CUID] = SrcId; + // Print out a .file directive to specify files for .loc directives. + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID); + + return SrcId; +} + +// Create new CompileUnit for the given metadata node with tag +// DW_TAG_compile_unit. +CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { + DICompileUnit DIUnit(N); + StringRef FN = DIUnit.getFilename(); + CompilationDir = DIUnit.getDirectory(); + + DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, + DIUnit.getLanguage(), Die, Asm, + this, &InfoHolder); + + FileIDCUMap[NewCU->getUniqueID()] = 0; + // Call this to emit a .file directive if it wasn't emitted for the source + // file this CU comes from yet. + getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID()); + + NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); + NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit.getLanguage()); + NewCU->addString(Die, dwarf::DW_AT_name, FN); + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point + // into an entity. We're using 0 (or a NULL label) for this. + NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); + + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", + NewCU->getUniqueID()); + Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, + NewCU->getUniqueID()); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + NewCU->getUniqueID() == 0 ? + Asm->GetTempSymbol("section_line") : LineTableStartSym); + else if (NewCU->getUniqueID() == 0) + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + else + NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + LineTableStartSym, DwarfLineSectionSym); + + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + if (DIUnit.isOptimized()) + NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); + + StringRef Flags = DIUnit.getFlags(); + if (!Flags.empty()) + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags); + + if (unsigned RVer = DIUnit.getRunTimeVersion()) + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); + + if (!FirstCU) + FirstCU = NewCU; + + InfoHolder.addUnit(NewCU); + + CUMap.insert(std::make_pair(N, NewCU)); + return NewCU; +} + +// Construct subprogram DIE. +void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, + const MDNode *N) { + CompileUnit *&CURef = SPMap[N]; + if (CURef) + return; + CURef = TheCU; + + DISubprogram SP(N); + if (!SP.isDefinition()) + // This is a method declaration which will be handled while constructing + // class type. + return; + + DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); + + // Add to map. + TheCU->insertDIE(N, SubprogramDie); + + // Add to context owner. + TheCU->addToContextOwner(SubprogramDie, SP.getContext()); + + // Expose as global, if requested. + if (GenerateDwarfPubNamesSection) + TheCU->addGlobalName(SP.getName(), SubprogramDie); +} + +// Emit all Dwarf sections that should come prior to the content. Create +// global DIEs and emit initial debug info sections. This is invoked by +// the target AsmPrinter. +void DwarfDebug::beginModule() { + if (DisableDebugInfoPrinting) + return; + + const Module *M = MMI->getModule(); + + // If module has named metadata anchors then use them, otherwise scan the + // module using debug info finder to collect debug info. + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) + return; + + // Emit initial sections so we can reference labels later. + emitSectionLabels(); + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CUNode(CU_Nodes->getOperand(i)); + CompileUnit *CU = constructCompileUnit(CUNode); + DIArray GVs = CUNode.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) + CU->createGlobalVariableDIE(GVs.getElement(i)); + DIArray SPs = CUNode.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + constructSubprogramDIE(CU, SPs.getElement(i)); + DIArray EnumTypes = CUNode.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); + DIArray RetainedTypes = CUNode.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + // This should be a unique identifier when we want to build .dwp files. + CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + // Now construct the skeleton CU associated. + constructSkeletonCU(CUNode); + } + } + + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); + + // Prime section data. + SectionMap.insert(Asm->getObjFileLowering().getTextSection()); +} + +// Attach DW_AT_inline attribute with inlined subprogram DIEs. +void DwarfDebug::computeInlinedDIEs() { + // Attach DW_AT_inline attribute with inlined subprogram DIEs. + for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), + AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { + DIE *ISP = *AI; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } + for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), + AE = AbstractSPDies.end(); AI != AE; ++AI) { + DIE *ISP = AI->second; + if (InlinedSubprogramDIEs.count(ISP)) + continue; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } +} + +// Collect info for variables that were optimized out. +void DwarfDebug::collectDeadVariables() { + const Module *M = MMI->getModule(); + DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap; + + if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit TheCU(CU_Nodes->getOperand(i)); + DIArray Subprograms = TheCU.getSubprograms(); + for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { + DISubprogram SP(Subprograms.getElement(i)); + if (ProcessedSPNodes.count(SP) != 0) continue; + if (!SP.Verify()) continue; + if (!SP.isDefinition()) continue; + DIArray Variables = SP.getVariables(); + if (Variables.getNumElements() == 0) continue; + + LexicalScope *Scope = + new LexicalScope(NULL, DIDescriptor(SP), NULL, false); + DeadFnScopeMap[SP] = Scope; + + // Construct subprogram DIE and add variables DIEs. + CompileUnit *SPCU = CUMap.lookup(TheCU); + assert(SPCU && "Unable to find Compile Unit!"); + constructSubprogramDIE(SPCU, SP); + DIE *ScopeDIE = SPCU->getDIE(SP); + for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { + DIVariable DV(Variables.getElement(vi)); + if (!DV.Verify()) continue; + DbgVariable *NewVar = new DbgVariable(DV, NULL); + if (DIE *VariableDIE = + SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope())) + ScopeDIE->addChild(VariableDIE); + } + } + } + } + DeleteContainerSeconds(DeadFnScopeMap); +} + +void DwarfDebug::finalizeModuleInfo() { + // Collect info for variables that were optimized out. + collectDeadVariables(); + + // Attach DW_AT_inline attribute with inlined subprogram DIEs. + computeInlinedDIEs(); + + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. + for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), + CUE = CUMap.end(); CUI != CUE; ++CUI) { + CompileUnit *TheCU = CUI->second; + TheCU->constructContainingTypeDIEs(); + } + + // Compute DIE offsets and sizes. + InfoHolder.computeSizeAndOffsets(); + if (useSplitDwarf()) + SkeletonHolder.computeSizeAndOffsets(); +} + +void DwarfDebug::endSections() { + // Standard sections final addresses. + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end")); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); + + // End text sections. + for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { + Asm->OutStreamer.SwitchSection(SectionMap[I]); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); + } +} + +// Emit all Dwarf sections that should come after the content. +void DwarfDebug::endModule() { + + if (!FirstCU) return; + + // End any existing sections. + // TODO: Does this need to happen? + endSections(); + + // Finalize the debug info for the module. + finalizeModuleInfo(); + + if (!useSplitDwarf()) { + // Emit all the DIEs into a debug info section. + emitDebugInfo(); + + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); + + // Emit info into a debug loc section. + emitDebugLoc(); + + // Emit info into a debug aranges section. + emitDebugARanges(); + + // Emit info into a debug ranges section. + emitDebugRanges(); + + // Emit info into a debug macinfo section. + emitDebugMacInfo(); + + // Emit inline info. + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); + } else { + // TODO: Fill this in for separated debug sections and separate + // out information into new sections. + + // Emit the debug info section and compile units. + emitDebugInfo(); + emitDebugInfoDWO(); + + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); + emitDebugAbbrevDWO(); + + // Emit info into a debug loc section. + emitDebugLoc(); + + // Emit info into a debug aranges section. + emitDebugARanges(); + + // Emit info into a debug ranges section. + emitDebugRanges(); + + // Emit info into a debug macinfo section. + emitDebugMacInfo(); + + // Emit DWO addresses. + InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); + + // Emit inline info. + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); + } + + // Emit info into the dwarf accelerator table sections. + if (useDwarfAccelTables()) { + emitAccelNames(); + emitAccelObjC(); + emitAccelNamespaces(); + emitAccelTypes(); + } + + // Emit info into a debug pubnames section, if requested. + if (GenerateDwarfPubNamesSection) + emitDebugPubnames(); + + // Emit info into a debug pubtypes section. + // TODO: When we don't need the option anymore we can + // remove all of the code that adds to the table. + if (useDarwinGDBCompat()) + emitDebugPubTypes(); + + // Finally emit string information into a string table. + emitDebugStr(); + if (useSplitDwarf()) + emitDebugStrDWO(); + + // clean up. + SPMap.clear(); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) + delete I->second; + + for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(), + E = SkeletonCUs.end(); I != E; ++I) + delete *I; + + // Reset these for the next Module if we have one. + FirstCU = NULL; +} + +// Find abstract variable, if any, associated with Var. +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, + DebugLoc ScopeLoc) { + LLVMContext &Ctx = DV->getContext(); + // More then one inlined variable corresponds to one abstract variable. + DIVariable Var = cleanseInlinedVariable(DV, Ctx); + DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); + if (AbsDbgVariable) + return AbsDbgVariable; + + LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx)); + if (!Scope) + return NULL; + + AbsDbgVariable = new DbgVariable(Var, NULL); + addScopeVariable(Scope, AbsDbgVariable); + AbstractVariables[Var] = AbsDbgVariable; + return AbsDbgVariable; +} + +// If Var is a current function argument then add it to CurrentFnArguments list. +bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, LexicalScope *Scope) { + if (!LScopes.isCurrentFunctionScope(Scope)) + return false; + DIVariable DV = Var->getVariable(); + if (DV.getTag() != dwarf::DW_TAG_arg_variable) + return false; + unsigned ArgNo = DV.getArgNumber(); + if (ArgNo == 0) + return false; + + size_t Size = CurrentFnArguments.size(); + if (Size == 0) + CurrentFnArguments.resize(MF->getFunction()->arg_size()); + // llvm::Function argument size is not good indicator of how many + // arguments does the function have at source level. + if (ArgNo > Size) + CurrentFnArguments.resize(ArgNo * 2); + CurrentFnArguments[ArgNo - 1] = Var; + return true; +} + +// Collect variable information from side table maintained by MMI. +void +DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, + SmallPtrSet<const MDNode *, 16> &Processed) { + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + const MDNode *Var = VI->first; + if (!Var) continue; + Processed.insert(Var); + DIVariable DV(Var); + const std::pair<unsigned, DebugLoc> &VP = VI->second; + + LexicalScope *Scope = LScopes.findLexicalScope(VP.second); + + // If variable scope is not found then skip this variable. + if (Scope == 0) + continue; + + DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); + DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable); + RegVar->setFrameIndex(VP.first); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + addScopeVariable(Scope, RegVar); + if (AbsDbgVariable) + AbsDbgVariable->setFrameIndex(VP.first); + } +} + +// Return true if debug value, encoded by DBG_VALUE instruction, is in a +// defined reg. +static bool isDbgValueInDefinedReg(const MachineInstr *MI) { + assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + return MI->getNumOperands() == 3 && + MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && + MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; +} + +// Get .debug_loc entry for the instruction range starting at MI. +static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, + const MCSymbol *FLabel, + const MCSymbol *SLabel, + const MachineInstr *MI) { + const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + if (MI->getNumOperands() != 3) { + MachineLocation MLoc = Asm->getDebugValueLocation(MI); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + MachineLocation MLoc; + MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm()); + if (MI->getOperand(0).isFPImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm()); + if (MI->getOperand(0).isCImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); + + llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); +} + +// Find variables for each lexical scope. +void +DwarfDebug::collectVariableInfo(const MachineFunction *MF, + SmallPtrSet<const MDNode *, 16> &Processed) { + + // collection info from MMI table. + collectVariableInfoFromMMITable(MF, Processed); + + for (SmallVectorImpl<const MDNode*>::const_iterator + UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE; + ++UVI) { + const MDNode *Var = *UVI; + if (Processed.count(Var)) + continue; + + // History contains relevant DBG_VALUE instructions for Var and instructions + // clobbering it. + SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + if (History.empty()) + continue; + const MachineInstr *MInsn = History.front(); + + DIVariable DV(Var); + LexicalScope *Scope = NULL; + if (DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(DV.getContext()).describes(MF->getFunction())) + Scope = LScopes.getCurrentFunctionScope(); + else if (MDNode *IA = DV.getInlinedAt()) + Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); + else + Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1))); + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + Processed.insert(DV); + assert(MInsn->isDebugValue() && "History must begin with debug value"); + DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); + DbgVariable *RegVar = new DbgVariable(DV, AbsVar); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + addScopeVariable(Scope, RegVar); + if (AbsVar) + AbsVar->setMInsn(MInsn); + + // Simplify ranges that are fully coalesced. + if (History.size() <= 1 || (History.size() == 2 && + MInsn->isIdenticalTo(History.back()))) { + RegVar->setMInsn(MInsn); + continue; + } + + // Handle multiple DBG_VALUE instructions describing one variable. + RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); + + for (SmallVectorImpl<const MachineInstr*>::const_iterator + HI = History.begin(), HE = History.end(); HI != HE; ++HI) { + const MachineInstr *Begin = *HI; + assert(Begin->isDebugValue() && "Invalid History entry"); + + // Check if DBG_VALUE is truncating a range. + if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() + && !Begin->getOperand(0).getReg()) + continue; + + // Compute the range for a register location. + const MCSymbol *FLabel = getLabelBeforeInsn(Begin); + const MCSymbol *SLabel = 0; + + if (HI + 1 == HE) + // If Begin is the last instruction in History then its value is valid + // until the end of the function. + SLabel = FunctionEndSym; + else { + const MachineInstr *End = HI[1]; + DEBUG(dbgs() << "DotDebugLoc Pair:\n" + << "\t" << *Begin << "\t" << *End << "\n"); + if (End->isDebugValue()) + SLabel = getLabelBeforeInsn(End); + else { + // End is a normal instruction clobbering the range. + SLabel = getLabelAfterInsn(End); + assert(SLabel && "Forgot label after clobber instruction"); + ++HI; + } + } + + // The value is valid until the next DBG_VALUE or clobber. + DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, + Begin)); + } + DotDebugLocEntries.push_back(DotDebugLocEntry()); + } + + // Collect info for variables that were optimized out. + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); + for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { + DIVariable DV(Variables.getElement(i)); + if (!DV || !DV.Verify() || !Processed.insert(DV)) + continue; + if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) + addScopeVariable(Scope, new DbgVariable(DV, NULL)); + } +} + +// Return Label preceding the instruction. +MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { + MCSymbol *Label = LabelsBeforeInsn.lookup(MI); + assert(Label && "Didn't insert label before instruction"); + return Label; +} + +// Return Label immediately following the instruction. +MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { + return LabelsAfterInsn.lookup(MI); +} + +// Process beginning of an instruction. +void DwarfDebug::beginInstruction(const MachineInstr *MI) { + // Check if source location changes, but ignore DBG_VALUE locations. + if (!MI->isDebugValue()) { + DebugLoc DL = MI->getDebugLoc(); + if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { + unsigned Flags = 0; + PrevInstLoc = DL; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END; + PrologEndLoc = DebugLoc(); + } + if (PrologEndLoc.isUnknown()) + Flags |= DWARF2_FLAG_IS_STMT; + + if (!DL.isUnknown()) { + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); + } else + recordSourceLine(0, 0, 0, 0); + } + } + + // Insert labels where requested. + DenseMap<const MachineInstr*, MCSymbol*>::iterator I = + LabelsBeforeInsn.find(MI); + + // No label needed. + if (I == LabelsBeforeInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + if (!PrevLabel) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +// Process end of an instruction. +void DwarfDebug::endInstruction(const MachineInstr *MI) { + // Don't create a new label after DBG_VALUE instructions. + // They don't generate code. + if (!MI->isDebugValue()) + PrevLabel = 0; + + DenseMap<const MachineInstr*, MCSymbol*>::iterator I = + LabelsAfterInsn.find(MI); + + // No label needed. + if (I == LabelsAfterInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + // We need a label after this instruction. + if (!PrevLabel) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +// Each LexicalScope has first instruction and last instruction to mark +// beginning and end of a scope respectively. Create an inverse map that list +// scopes starts (and ends) with an instruction. One instruction may start (or +// end) multiple scopes. Ignore scopes that are not reachable. +void DwarfDebug::identifyScopeMarkers() { + SmallVector<LexicalScope *, 4> WorkList; + WorkList.push_back(LScopes.getCurrentFunctionScope()); + while (!WorkList.empty()) { + LexicalScope *S = WorkList.pop_back_val(); + + const SmallVector<LexicalScope *, 4> &Children = S->getChildren(); + if (!Children.empty()) + for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + SE = Children.end(); SI != SE; ++SI) + WorkList.push_back(*SI); + + if (S->isAbstractScope()) + continue; + + const SmallVector<InsnRange, 4> &Ranges = S->getRanges(); + if (Ranges.empty()) + continue; + for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + assert(RI->first && "InsnRange does not have first instruction!"); + assert(RI->second && "InsnRange does not have second instruction!"); + requestLabelBeforeInsn(RI->first); + requestLabelAfterInsn(RI->second); + } + } +} + +// Get MDNode for DebugLoc's scope. +static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { + if (MDNode *InlinedAt = DL.getInlinedAt(Ctx)) + return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx); + return DL.getScope(Ctx); +} + +// Walk up the scope chain of given debug loc and find line number info +// for the function. +static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { + const MDNode *Scope = getScopeNode(DL, Ctx); + DISubprogram SP = getDISubprogram(Scope); + if (SP.Verify()) { + // Check for number of operands since the compatibility is + // cheap here. + if (SP->getNumOperands() > 19) + return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); + else + return DebugLoc::get(SP.getLineNumber(), 0, SP); + } + + return DebugLoc(); +} + +// Gather pre-function debug information. Assumes being called immediately +// after the function entry point has been emitted. +void DwarfDebug::beginFunction(const MachineFunction *MF) { + if (!MMI->hasDebugInfo()) return; + LScopes.initialize(*MF); + if (LScopes.empty()) return; + identifyScopeMarkers(); + + // Set DwarfCompileUnitID in MCContext to the Compile Unit this function + // belongs to. + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + assert(TheCU && "Unable to find compile unit!"); + Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + + FunctionBeginSym = Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber()); + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(FunctionBeginSym); + + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + // LiveUserVar - Map physreg numbers to the MDNode they contain. + std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); + + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + bool AtBlockEntry = true; + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MI = II; + + if (MI->isDebugValue()) { + assert(MI->getNumOperands() > 1 && "Invalid machine instruction!"); + + // Keep track of user variables. + const MDNode *Var = + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + // Variable is in a register, we need to check for clobbers. + if (isDbgValueInDefinedReg(MI)) + LiveUserVar[MI->getOperand(0).getReg()] = Var; + + // Check the history of this variable. + SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + if (History.empty()) { + UserVariables.push_back(Var); + // The first mention of a function argument gets the FunctionBeginSym + // label, so arguments are visible when breaking at function entry. + DIVariable DV(Var); + if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(getDISubprogram(DV.getContext())) + .describes(MF->getFunction())) + LabelsBeforeInsn[MI] = FunctionBeginSym; + } else { + // We have seen this variable before. Try to coalesce DBG_VALUEs. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue()) { + // Coalesce identical entries at the end of History. + if (History.size() >= 2 && + Prev->isIdenticalTo(History[History.size() - 2])) { + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << *Prev + << "\t" << *History[History.size() - 2] << "\n"); + History.pop_back(); + } + + // Terminate old register assignments that don't reach MI; + MachineFunction::const_iterator PrevMBB = Prev->getParent(); + if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) && + isDbgValueInDefinedReg(Prev)) { + // Previous register assignment needs to terminate at the end of + // its basic block. + MachineBasicBlock::const_iterator LastMI = + PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) { + // Drop DBG_VALUE for empty range. + DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" + << "\t" << *Prev << "\n"); + History.pop_back(); + } + else { + // Terminate after LastMI. + History.push_back(LastMI); + } + } + } + } + History.push_back(MI); + } else { + // Not a DBG_VALUE instruction. + if (!MI->isLabel()) + AtBlockEntry = false; + + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + if (!MI->getFlag(MachineInstr::FrameSetup) && + (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())) + PrologEndLoc = MI->getDebugLoc(); + + // Check if the instruction clobbers any registers with debug vars. + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) + continue; + for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); + AI.isValid(); ++AI) { + unsigned Reg = *AI; + const MDNode *Var = LiveUserVar[Reg]; + if (!Var) + continue; + // Reg is now clobbered. + LiveUserVar[Reg] = 0; + + // Was MD last defined by a DBG_VALUE referring to Reg? + DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); + if (HistI == DbgValues.end()) + continue; + SmallVectorImpl<const MachineInstr*> &History = HistI->second; + if (History.empty()) + continue; + const MachineInstr *Prev = History.back(); + // Sanity-check: Register assignments are terminated at the end of + // their block. + if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent()) + continue; + // Is the variable still in Reg? + if (!isDbgValueInDefinedReg(Prev) || + Prev->getOperand(0).getReg() != Reg) + continue; + // Var is clobbered. Make sure the next instruction gets a label. + History.push_back(MI); + } + } + } + } + } + + for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); + I != E; ++I) { + SmallVectorImpl<const MachineInstr*> &History = I->second; + if (History.empty()) + continue; + + // Make sure the final register assignments are terminated. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { + const MachineBasicBlock *PrevMBB = Prev->getParent(); + MachineBasicBlock::const_iterator LastMI = + PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) + // Drop DBG_VALUE for empty range. + History.pop_back(); + else { + // Terminate after LastMI. + History.push_back(LastMI); + } + } + // Request labels for the full history. + for (unsigned i = 0, e = History.size(); i != e; ++i) { + const MachineInstr *MI = History[i]; + if (MI->isDebugValue()) + requestLabelBeforeInsn(MI); + else + requestLabelAfterInsn(MI); + } + } + + PrevInstLoc = DebugLoc(); + PrevLabel = FunctionBeginSym; + + // Record beginning of function. + if (!PrologEndLoc.isUnknown()) { + DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc, + MF->getFunction()->getContext()); + recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), + FnStartDL.getScope(MF->getFunction()->getContext()), + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + DWARF2_FLAG_IS_STMT); + } +} + +void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { +// SmallVector<DbgVariable *, 8> &Vars = ScopeVariables.lookup(LS); + ScopeVariables[LS].push_back(Var); +// Vars.push_back(Var); +} + +// Gather and emit post-function debug information. +void DwarfDebug::endFunction(const MachineFunction *MF) { + if (!MMI->hasDebugInfo() || LScopes.empty()) return; + + // Define end label for subprogram. + FunctionEndSym = Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber()); + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(FunctionEndSym); + // Set DwarfCompileUnitID in MCContext to default value. + Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + + SmallPtrSet<const MDNode *, 16> ProcessedVars; + collectVariableInfo(MF, ProcessedVars); + + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + assert(TheCU && "Unable to find compile unit!"); + + // Construct abstract scopes. + ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList(); + for (unsigned i = 0, e = AList.size(); i != e; ++i) { + LexicalScope *AScope = AList[i]; + DISubprogram SP(AScope->getScopeNode()); + if (SP.Verify()) { + // Collect info for variables that were optimized out. + DIArray Variables = SP.getVariables(); + for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { + DIVariable DV(Variables.getElement(i)); + if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) + continue; + // Check that DbgVariable for DV wasn't created earlier, when + // findAbstractVariable() was called for inlined instance of DV. + LLVMContext &Ctx = DV->getContext(); + DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx); + if (AbstractVariables.lookup(CleanDV)) + continue; + if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) + addScopeVariable(Scope, new DbgVariable(DV, NULL)); + } + } + if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) + constructScopeDIE(TheCU, AScope); + } + + DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); + + if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) + TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); + + DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), + MMI->getFrameMoves())); + + // Clear debug info + for (DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >::iterator + I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) + DeleteContainerPointers(I->second); + ScopeVariables.clear(); + DeleteContainerPointers(CurrentFnArguments); + UserVariables.clear(); + DbgValues.clear(); + AbstractVariables.clear(); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); + PrevLabel = NULL; +} + +// Register a source line with debug info. Returns the unique label that was +// emitted and which provides correspondence to the source line list. +void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, + unsigned Flags) { + StringRef Fn; + StringRef Dir; + unsigned Src = 1; + if (S) { + DIDescriptor Scope(S); + + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Fn = CU.getFilename(); + Dir = CU.getDirectory(); + } else if (Scope.isFile()) { + DIFile F(S); + Fn = F.getFilename(); + Dir = F.getDirectory(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Fn = SP.getFilename(); + Dir = SP.getDirectory(); + } else if (Scope.isLexicalBlockFile()) { + DILexicalBlockFile DBF(S); + Fn = DBF.getFilename(); + Dir = DBF.getDirectory(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Fn = DB.getFilename(); + Dir = DB.getDirectory(); + } else + llvm_unreachable("Unexpected scope info"); + + Src = getOrCreateSourceID(Fn, Dir, + Asm->OutStreamer.getContext().getDwarfCompileUnitID()); + } + Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); +} + +//===----------------------------------------------------------------------===// +// Emit Methods +//===----------------------------------------------------------------------===// + +// Compute the size and offset of a DIE. +unsigned +DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { + // Get the children. + const std::vector<DIE *> &Children = Die->getChildren(); + + // Record the abbreviation. + assignAbbrevNumber(Die->getAbbrev()); + + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1); + + // Set DIE offset + Die->setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); + + const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); + + // Size the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) + // Size attribute value. + Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); + + // Size the DIE children if any. + if (!Children.empty()) { + assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes && + "Children flag not set"); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + Offset = computeSizeAndOffset(Children[j], Offset); + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die->setSize(Offset - Die->getOffset()); + return Offset; +} + +// Compute the size and offset of all the DIEs. +void DwarfUnits::computeSizeAndOffsets() { + // Offset from the beginning of debug info section. + unsigned AccuOffset = 0; + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + (*I)->setDebugInfoOffset(AccuOffset); + unsigned Offset = + sizeof(int32_t) + // Length of Compilation Unit Info + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + + unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + AccuOffset += EndOffset; + } +} + +// Emit initial Dwarf sections with a label at the start of each one. +void DwarfDebug::emitSectionLabels() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + // Dwarf sections base addresses. + DwarfInfoSectionSym = + emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); + DwarfAbbrevSectionSym = + emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); + if (useSplitDwarf()) + DwarfAbbrevDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(), + "section_abbrev_dwo"); + emitSectionSym(Asm, TLOF.getDwarfARangesSection()); + + if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) + emitSectionSym(Asm, MacroInfo); + + DwarfLineSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + emitSectionSym(Asm, TLOF.getDwarfLocSection()); + if (GenerateDwarfPubNamesSection) + emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); + emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + DwarfStrSectionSym = + emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); + if (useSplitDwarf()) + DwarfStrDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); + DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(), + "debug_range"); + + DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(), + "section_debug_loc"); + + TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); + emitSectionSym(Asm, TLOF.getDataSection()); +} + +// Recursively emits a debug information entry. +void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { + // Get the abbreviation for this DIE. + unsigned AbbrevNumber = Die->getAbbrevNumber(); + const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1); + + // Emit the code (index) for the abbreviation. + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" + + Twine::utohexstr(Die->getOffset()) + ":0x" + + Twine::utohexstr(Die->getSize()) + " " + + dwarf::TagString(Abbrev->getTag())); + Asm->EmitULEB128(AbbrevNumber); + + const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + unsigned Attr = AbbrevData[i].getAttribute(); + unsigned Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); + + switch (Attr) { + case dwarf::DW_AT_abstract_origin: { + DIEEntry *E = cast<DIEEntry>(Values[i]); + DIE *Origin = E->getEntry(); + unsigned Addr = Origin->getOffset(); + if (Form == dwarf::DW_FORM_ref_addr) { + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Origin->getOffset() returns the offset from start of the + // compile unit. + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Addr += Holder.getCUOffset(Origin->getCompileUnit()); + } + Asm->EmitInt32(Addr); + break; + } + case dwarf::DW_AT_ranges: { + // DW_AT_range Value encodes offset in debug_range section. + DIEInteger *V = cast<DIEInteger>(Values[i]); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) { + Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, + V->getValue(), + 4); + } else { + Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, + V->getValue(), + DwarfDebugRangeSectionSym, + 4); + } + break; + } + case dwarf::DW_AT_location: { + if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelReference(L->getValue(), 4); + else + Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); + } else { + Values[i]->EmitValue(Asm, Form); + } + break; + } + case dwarf::DW_AT_accessibility: { + if (Asm->isVerbose()) { + DIEInteger *V = cast<DIEInteger>(Values[i]); + Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue())); + } + Values[i]->EmitValue(Asm, Form); + break; + } + default: + // Emit an attribute using the defined form. + Values[i]->EmitValue(Asm, Form); + break; + } + } + + // Emit the DIE children if any. + if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) { + const std::vector<DIE *> &Children = Die->getChildren(); + + for (unsigned j = 0, M = Children.size(); j < M; ++j) + emitDIE(Children[j], Abbrevs); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("End Of Children Mark"); + Asm->EmitInt8(0); + } +} + +// Emit the various dwarf units to the unit section USection with +// the abbreviations going into ASection. +void DwarfUnits::emitUnits(DwarfDebug *DD, + const MCSection *USection, + const MCSection *ASection, + const MCSymbol *ASectionSym) { + Asm->OutStreamer.SwitchSection(USection); + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; + DIE *Die = TheCU->getCUDie(); + + // Emit the compile units header. + Asm->OutStreamer + .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(), + TheCU->getUniqueID())); + + // Emit size of content not including length itself + unsigned ContentSize = Die->getSize() + + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + + Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), + ASectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + + DD->emitDIE(Die, Abbreviations); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), + TheCU->getUniqueID())); + } +} + +/// For a given compile unit DIE, returns offset from beginning of debug info. +unsigned DwarfUnits::getCUOffset(DIE *Die) { + assert(Die->getTag() == dwarf::DW_TAG_compile_unit && + "Input DIE should be compile unit in getCUOffset."); + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; + if (TheCU->getCUDie() == Die) + return TheCU->getDebugInfoOffset(); + } + llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); +} + +// Emit the debug info section. +void DwarfDebug::emitDebugInfo() { + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + + Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(), + Asm->getObjFileLowering().getDwarfAbbrevSection(), + DwarfAbbrevSectionSym); +} + +// Emit the abbreviation section. +void DwarfDebug::emitAbbreviations() { + if (!useSplitDwarf()) + emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(), + &Abbreviations); + else + emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); +} + +void DwarfDebug::emitAbbrevs(const MCSection *Section, + std::vector<DIEAbbrev *> *Abbrevs) { + // Check to see if it is worth the effort. + if (!Abbrevs->empty()) { + // Start the debug abbrev section. + Asm->OutStreamer.SwitchSection(Section); + + MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName()); + Asm->OutStreamer.EmitLabel(Begin); + + // For each abbrevation. + for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) { + // Get abbreviation data + const DIEAbbrev *Abbrev = Abbrevs->at(i); + + // Emit the abbrevations code (base 1 index.) + Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(Asm); + } + + // Mark end of abbreviations. + Asm->EmitULEB128(0, "EOM(3)"); + + MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName()); + Asm->OutStreamer.EmitLabel(End); + } +} + +// Emit the last address of the section and the end of the line matrix. +void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { + // Define last address of section. + Asm->OutStreamer.AddComment("Extended Op"); + Asm->EmitInt8(0); + + Asm->OutStreamer.AddComment("Op size"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); + Asm->OutStreamer.AddComment("DW_LNE_set_address"); + Asm->EmitInt8(dwarf::DW_LNE_set_address); + + Asm->OutStreamer.AddComment("Section end label"); + + Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), + Asm->getDataLayout().getPointerSize()); + + // Mark end of matrix. + Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); + Asm->EmitInt8(0); + Asm->EmitInt8(1); + Asm->EmitInt8(1); +} + +// Emit visible names into a hashed accelerator table section. +void DwarfDebug::emitAccelNames() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames(); + for (StringMap<std::vector<DIE*> >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector<DIE *> &Entities = GI->second; + for (std::vector<DIE *>::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } + + AT.FinalizeTable(Asm, "Names"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelNamesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, &InfoHolder); +} + +// Emit objective C classes and categories into a hashed accelerator table +// section. +void DwarfDebug::emitAccelObjC() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC(); + for (StringMap<std::vector<DIE*> >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector<DIE *> &Entities = GI->second; + for (std::vector<DIE *>::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } + + AT.FinalizeTable(Asm, "ObjC"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelObjCSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, &InfoHolder); +} + +// Emit namespace dies into a hashed accelerator table. +void DwarfDebug::emitAccelNamespaces() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace(); + for (StringMap<std::vector<DIE*> >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector<DIE *> &Entities = GI->second; + for (std::vector<DIE *>::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } + + AT.FinalizeTable(Asm, "namespac"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelNamespaceSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, &InfoHolder); +} + +// Emit type dies into a hashed accelerator table. +void DwarfDebug::emitAccelTypes() { + std::vector<DwarfAccelTable::Atom> Atoms; + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag, + dwarf::DW_FORM_data2)); + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags, + dwarf::DW_FORM_data1)); + DwarfAccelTable AT(Atoms); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names + = TheCU->getAccelTypes(); + for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second; + for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI + = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) + AT.AddName(Name, (*DI).first, (*DI).second); + } + } + + AT.FinalizeTable(Asm, "types"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelTypesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, &InfoHolder); +} + +/// emitDebugPubnames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubnames() { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + + typedef DenseMap<const MDNode*, CompileUnit*> CUMapType; + for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + unsigned ID = TheCU->getUniqueID(); + + if (TheCU->getGlobalNames().empty()) + continue; + + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); + + Asm->OutStreamer.AddComment("Length of Public Names Info"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), + Asm->GetTempSymbol("pubnames_begin", ID), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); + + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID), + Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + 4); + + const StringMap<DIE*> &Globals = TheCU->getGlobalNames(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const DIE *Entity = GI->second; + + Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID)); + } +} + +void DwarfDebug::emitDebugPubTypes() { + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + // Start the dwarf pubtypes section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.AddComment("Length of Public Types Info"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", + TheCU->getUniqueID())); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), + TheCU->getUniqueID()), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), + TheCU->getUniqueID()), + Asm->GetTempSymbol(ISec->getLabelBeginName(), + TheCU->getUniqueID()), + 4); + + const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE *Entity = GI->second; + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", + TheCU->getUniqueID())); + } +} + +// Emit strings into a string section. +void DwarfUnits::emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection = NULL, + const MCSymbol *StrSecSym = NULL) { + + if (StringPool.empty()) return; + + // Start the dwarf str section. + Asm->OutStreamer.SwitchSection(StrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector<std::pair<unsigned, + StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries; + + for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator + I = StringPool.begin(), E = StringPool.end(); + I != E; ++I) + Entries.push_back(std::make_pair(I->second.second, &*I)); + + array_pod_sort(Entries.begin(), Entries.end()); + + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + // Emit a label for reference from debug information entries. + Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); + + // Emit the string itself with a terminating null byte. + Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), + Entries[i].second->getKeyLength()+1)); + } + + // If we've got an offset section go ahead and emit that now as well. + if (OffsetSection) { + Asm->OutStreamer.SwitchSection(OffsetSection); + unsigned offset = 0; + unsigned size = 4; // FIXME: DWARF64 is 8. + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + Asm->OutStreamer.EmitIntValue(offset, size); + offset += Entries[i].second->getKeyLength() + 1; + } + } +} + +// Emit strings into a string section. +void DwarfUnits::emitAddresses(const MCSection *AddrSection) { + + if (AddressPool.empty()) return; + + // Start the dwarf addr section. + Asm->OutStreamer.SwitchSection(AddrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector<std::pair<unsigned, + std::pair<MCSymbol*, unsigned>* >, 64> Entries; + + for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator + I = AddressPool.begin(), E = AddressPool.end(); + I != E; ++I) + Entries.push_back(std::make_pair(I->second.second, &(I->second))); + + array_pod_sort(Entries.begin(), Entries.end()); + + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + // Emit a label for reference from debug information entries. + MCSymbol *Sym = Entries[i].second->first; + if (Sym) + Asm->EmitLabelReference(Entries[i].second->first, + Asm->getDataLayout().getPointerSize()); + else + Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); + } + +} + +// Emit visible names into a debug str section. +void DwarfDebug::emitDebugStr() { + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); +} + +// Emit visible names into a debug loc section. +void DwarfDebug::emitDebugLoc() { + if (DotDebugLocEntries.empty()) + return; + + for (SmallVectorImpl<DotDebugLocEntry>::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I != E; ++I) { + DotDebugLocEntry &Entry = *I; + if (I + 1 != DotDebugLocEntries.end()) + Entry.Merge(I+1); + } + + // Start the dwarf loc section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + unsigned char Size = Asm->getDataLayout().getPointerSize(); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); + unsigned index = 1; + for (SmallVectorImpl<DotDebugLocEntry>::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I != E; ++I, ++index) { + DotDebugLocEntry &Entry = *I; + if (Entry.isMerged()) continue; + if (Entry.isEmpty()) { + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); + } else { + Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); + Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); + DIVariable DV(Entry.Variable); + Asm->OutStreamer.AddComment("Loc expr size"); + MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); + MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); + Asm->EmitLabelDifference(end, begin, 2); + Asm->OutStreamer.EmitLabel(begin); + if (Entry.isInt()) { + DIBasicType BTy(DV.getType()); + if (BTy.Verify() && + (BTy.getEncoding() == dwarf::DW_ATE_signed + || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { + Asm->OutStreamer.AddComment("DW_OP_consts"); + Asm->EmitInt8(dwarf::DW_OP_consts); + Asm->EmitSLEB128(Entry.getInt()); + } else { + Asm->OutStreamer.AddComment("DW_OP_constu"); + Asm->EmitInt8(dwarf::DW_OP_constu); + Asm->EmitULEB128(Entry.getInt()); + } + } else if (Entry.isLocation()) { + if (!DV.hasComplexAddress()) + // Regular entry. + Asm->EmitDwarfRegOp(Entry.Loc); + else { + // Complex address entry. + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (Entry.Loc.getOffset()) { + i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + Asm->OutStreamer.AddComment("DW_OP_deref"); + Asm->EmitInt8(dwarf::DW_OP_deref); + Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitSLEB128(DV.getAddrElement(1)); + } else { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(Loc); + i = 2; + } + } else { + Asm->EmitDwarfRegOp(Entry.Loc); + } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + if (!Entry.Loc.isReg()) + Asm->EmitInt8(dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown Opcode found in complex address"); + } + } + } + // else ... ignore constant fp. There is not any good way to + // to represent them here in dwarf. + Asm->OutStreamer.EmitLabel(end); + } + } +} + +// Emit visible names into a debug aranges section. +void DwarfDebug::emitDebugARanges() { + // Start the dwarf aranges section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); +} + +// Emit visible names into a debug ranges section. +void DwarfDebug::emitDebugRanges() { + // Start the dwarf ranges section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); + unsigned char Size = Asm->getDataLayout().getPointerSize(); + for (SmallVectorImpl<const MCSymbol *>::iterator + I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); + I != E; ++I) { + if (*I) + Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size); + else + Asm->OutStreamer.EmitIntValue(0, Size); + } +} + +// Emit visible names into a debug macinfo section. +void DwarfDebug::emitDebugMacInfo() { + if (const MCSection *LineInfo = + Asm->getObjFileLowering().getDwarfMacroInfoSection()) { + // Start the dwarf macinfo section. + Asm->OutStreamer.SwitchSection(LineInfo); + } +} + +// Emit inline info using following format. +// Section Header: +// 1. length of section +// 2. Dwarf version number +// 3. address size. +// +// Entries (one "entry" for each function that was inlined): +// +// 1. offset into __debug_str section for MIPS linkage name, if exists; +// otherwise offset into __debug_str for regular function name. +// 2. offset into __debug_str section for regular function name. +// 3. an unsigned LEB128 number indicating the number of distinct inlining +// instances for the function. +// +// The rest of the entry consists of a {die_offset, low_pc} pair for each +// inlined instance; the die_offset points to the inlined_subroutine die in the +// __debug_info section, and the low_pc is the starting address for the +// inlining instance. +void DwarfDebug::emitDebugInlineInfo() { + if (!Asm->MAI->doesDwarfUseInlineInfoSection()) + return; + + if (!FirstCU) + return; + + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfDebugInlineSection()); + + Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1), + Asm->GetTempSymbol("debug_inlined_begin", 1), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1)); + + Asm->OutStreamer.AddComment("Dwarf Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + + for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(), + E = InlinedSPNodes.end(); I != E; ++I) { + + const MDNode *Node = *I; + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II + = InlineInfo.find(Node); + SmallVectorImpl<InlineInfoLabels> &Labels = II->second; + DISubprogram SP(Node); + StringRef LName = SP.getLinkageName(); + StringRef Name = SP.getName(); + + Asm->OutStreamer.AddComment("MIPS linkage name"); + if (LName.empty()) + Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), + DwarfStrSectionSym); + else + Asm->EmitSectionOffset(InfoHolder + .getStringPoolEntry(getRealLinkageName(LName)), + DwarfStrSectionSym); + + Asm->OutStreamer.AddComment("Function name"); + Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), + DwarfStrSectionSym); + Asm->EmitULEB128(Labels.size(), "Inline count"); + + for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(), + LE = Labels.end(); LI != LE; ++LI) { + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(LI->second->getOffset()); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); + Asm->OutStreamer.EmitSymbolValue(LI->first, + Asm->getDataLayout().getPointerSize()); + } + } + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1)); +} + +// DWARF5 Experimental Separate Dwarf emitters. + +// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, +// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, +// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present, +// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. +CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { + DICompileUnit DIUnit(N); + CompilationDir = DIUnit.getDirectory(); + + DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, + DIUnit.getLanguage(), Die, Asm, + this, &SkeletonHolder); + + NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit.getSplitDebugFilename()); + + // This should be a unique identifier when we want to build .dwp files. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + + // FIXME: The addr base should be relative for each compile unit, however, + // this one is going to be 0 anyhow. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0); + + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point + // into an entity. We're using 0, or a NULL label for this. + NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, + DwarfLineSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); + + if (!CompilationDir.empty()) + NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + SkeletonHolder.addUnit(NewCU); + SkeletonCUs.push_back(NewCU); + + return NewCU; +} + +void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) { + assert(useSplitDwarf() && "No split dwarf debug info?"); + emitAbbrevs(Section, &SkeletonAbbrevs); +} + +// Emit the .debug_info.dwo section for separated dwarf. This contains the +// compile units that would normally be in debug_info. +void DwarfDebug::emitDebugInfoDWO() { + assert(useSplitDwarf() && "No split dwarf debug info?"); + InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(), + Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), + DwarfAbbrevDWOSectionSym); +} + +// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the +// abbreviations for the .debug_info.dwo section. +void DwarfDebug::emitDebugAbbrevDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), + &Abbreviations); +} + +// Emit the .debug_str.dwo section for separated dwarf. This contains the +// string section and is identical in format to traditional .debug_str +// sections. +void DwarfDebug::emitDebugStrDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + const MCSection *OffSec = Asm->getObjFileLowering() + .getDwarfStrOffDWOSection(); + const MCSymbol *StrSym = DwarfStrSectionSym; + InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), + OffSec, StrSym); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h new file mode 100644 index 0000000..81e345e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -0,0 +1,649 @@ +//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ + +#include "DIE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/DebugInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/DebugLoc.h" + +namespace llvm { + +class CompileUnit; +class ConstantInt; +class ConstantFP; +class DbgVariable; +class MachineFrameInfo; +class MachineModuleInfo; +class MachineOperand; +class MCAsmInfo; +class DIEAbbrev; +class DIE; +class DIEBlock; +class DIEEntry; +class DwarfDebug; + +//===----------------------------------------------------------------------===// +/// \brief This class is used to record source line correspondence. +class SrcLineInfo { + unsigned Line; // Source line number. + unsigned Column; // Source column. + unsigned SourceID; // Source ID number. + MCSymbol *Label; // Label in code ID number. +public: + SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label) + : Line(L), Column(C), SourceID(S), Label(label) {} + + // Accessors + unsigned getLine() const { return Line; } + unsigned getColumn() const { return Column; } + unsigned getSourceID() const { return SourceID; } + MCSymbol *getLabel() const { return Label; } +}; + +/// \brief This struct describes location entries emitted in the .debug_loc +/// section. +typedef struct DotDebugLocEntry { + const MCSymbol *Begin; + const MCSymbol *End; + MachineLocation Loc; + const MDNode *Variable; + bool Merged; + bool Constant; + enum EntryType { + E_Location, + E_Integer, + E_ConstantFP, + E_ConstantInt + }; + enum EntryType EntryKind; + + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constants; + DotDebugLocEntry() + : Begin(0), End(0), Variable(0), Merged(false), + Constant(false) { Constants.Int = 0;} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, + const MDNode *V) + : Begin(B), End(E), Loc(L), Variable(V), Merged(false), + Constant(false) { Constants.Int = 0; EntryKind = E_Location; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, + const ConstantInt *IPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } + + /// \brief Empty entries are also used as a trigger to emit temp label. Such + /// labels are referenced is used to find debug_loc offset for a given DIE. + bool isEmpty() { return Begin == 0 && End == 0; } + bool isMerged() { return Merged; } + void Merge(DotDebugLocEntry *Next) { + if (!(Begin && Loc == Next->Loc && End == Next->Begin)) + return; + Next->Begin = Begin; + Merged = true; + } + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() { return Constants.Int; } + const ConstantFP *getConstantFP() { return Constants.CFP; } + const ConstantInt *getConstantInt() { return Constants.CIP; } +} DotDebugLocEntry; + +//===----------------------------------------------------------------------===// +/// \brief This class is used to track local variable information. +class DbgVariable { + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. + DbgVariable *AbsVar; // Corresponding Abstract variable, if any. + const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. + int FrameIndex; +public: + // AbsVar may be NULL. + DbgVariable(DIVariable V, DbgVariable *AV) + : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), + FrameIndex(~0) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + DbgVariable *getAbstractVariable() const { return AbsVar; } + const MachineInstr *getMInsn() const { return MInsn; } + void setMInsn(const MachineInstr *M) { MInsn = M; } + int getFrameIndex() const { return FrameIndex; } + void setFrameIndex(int FI) { FrameIndex = FI; } + // Translate tag to proper Dwarf tag. + unsigned getTag() const { + if (Var.getTag() == dwarf::DW_TAG_arg_variable) + return dwarf::DW_TAG_formal_parameter; + + return dwarf::DW_TAG_variable; + } + /// \brief Return true if DbgVariable is artificial. + bool isArtificial() const { + if (Var.isArtificial()) + return true; + if (getType().isArtificial()) + return true; + return false; + } + + bool isObjectPointer() const { + if (Var.isObjectPointer()) + return true; + if (getType().isObjectPointer()) + return true; + return false; + } + + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const; +}; + + +// A String->Symbol mapping of strings used by indirect +// references. +typedef StringMap<std::pair<MCSymbol*, unsigned>, + BumpPtrAllocator&> StrPool; + +// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect +// references. +typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool; + +/// \brief Collects and handles information specific to a particular +/// collection of units. +class DwarfUnits { + // Target of Dwarf emission, used for sizing of abbreviations. + AsmPrinter *Asm; + + // Used to uniquely define abbreviations. + FoldingSet<DIEAbbrev> *AbbreviationsSet; + + // A list of all the unique abbreviations in use. + std::vector<DIEAbbrev *> *Abbreviations; + + // A pointer to all units in the section. + SmallVector<CompileUnit *, 1> CUs; + + // Collection of strings for this unit and assorted symbols. + StrPool StringPool; + unsigned NextStringPoolNumber; + std::string StringPref; + + // Collection of addresses for this unit and assorted labels. + AddrPool AddressPool; + unsigned NextAddrPoolNumber; + +public: + DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, + std::vector<DIEAbbrev *> *A, const char *Pref, + BumpPtrAllocator &DA) : + Asm(AP), AbbreviationsSet(AS), Abbreviations(A), + StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), + AddressPool(), NextAddrPoolNumber(0) {} + + /// \brief Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); + + /// \brief Compute the size and offset of all the DIEs. + void computeSizeAndOffsets(); + + /// \brief Define a unique number for the abbreviation. + void assignAbbrevNumber(DIEAbbrev &Abbrev); + + /// \brief Add a unit to the list of CUs. + void addUnit(CompileUnit *CU) { CUs.push_back(CU); } + + /// \brief Emit all of the units to the section listed with the given + /// abbreviation section. + void emitUnits(DwarfDebug *, const MCSection *, const MCSection *, + const MCSymbol *); + + /// \brief Emit all of the strings to the section given. + void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + + /// \brief Emit all of the addresses to the section given. + void emitAddresses(const MCSection *); + + /// \brief Returns the entry into the start of the pool. + MCSymbol *getStringPoolSym(); + + /// \brief Returns an entry into the string pool with the given + /// string text. + MCSymbol *getStringPoolEntry(StringRef Str); + + /// \brief Returns the index into the string pool with the given + /// string text. + unsigned getStringPoolIndex(StringRef Str); + + /// \brief Returns the string pool. + StrPool *getStringPool() { return &StringPool; } + + /// \brief Returns the index into the address pool with the given + /// label/symbol. + unsigned getAddrPoolIndex(MCSymbol *); + + /// \brief Returns the address pool. + AddrPool *getAddrPool() { return &AddressPool; } + + /// \brief for a given compile unit DIE, returns offset from beginning of + /// debug info. + unsigned getCUOffset(DIE *Die); +}; + +/// \brief Collects and handles dwarf debug information. +class DwarfDebug { + // Target of Dwarf emission. + AsmPrinter *Asm; + + // Collected machine module information. + MachineModuleInfo *MMI; + + // All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + //===--------------------------------------------------------------------===// + // Attribute used to construct specific Dwarf sections. + // + + CompileUnit *FirstCU; + + // Maps MDNode with its corresponding CompileUnit. + DenseMap <const MDNode *, CompileUnit *> CUMap; + + // Maps subprogram MDNode with its corresponding CompileUnit. + DenseMap <const MDNode *, CompileUnit *> SPMap; + + // Used to uniquely define abbreviations. + FoldingSet<DIEAbbrev> AbbreviationsSet; + + // A list of all the unique abbreviations in use. + std::vector<DIEAbbrev *> Abbreviations; + + // Stores the current file ID for a given compile unit. + DenseMap <unsigned, unsigned> FileIDCUMap; + // Source id map, i.e. CUID, source filename and directory, + // separated by a zero byte, mapped to a unique id. + StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; + + // Provides a unique id per text section. + SetVector<const MCSection*> SectionMap; + + // List of Arguments (DbgValues) for current function. + SmallVector<DbgVariable *, 8> CurrentFnArguments; + + LexicalScopes LScopes; + + // Collection of abstract subprogram DIEs. + DenseMap<const MDNode *, DIE *> AbstractSPDies; + + // Collection of dbg variables of a scope. + DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables; + + // Collection of abstract variables. + DenseMap<const MDNode *, DbgVariable *> AbstractVariables; + + // Collection of DotDebugLocEntry. + SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; + + // Collection of subprogram DIEs that are marked (at the end of the module) + // as DW_AT_inline. + SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + + // Keep track of inlined functions and their location. This + // information is used to populate the debug_inlined section. + typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; + SmallVector<const MDNode *, 4> InlinedSPNodes; + + // This is a collection of subprogram MDNodes that are processed to + // create DIEs. + SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; + + // Maps instruction with label emitted before instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; + + // Maps instruction with label emitted after instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; + + // Every user variable mentioned by a DBG_VALUE instruction in order of + // appearance. + SmallVector<const MDNode*, 8> UserVariables; + + // For each user variable, keep a list of DBG_VALUE instructions in order. + // The list can also contain normal instructions that clobber the previous + // DBG_VALUE. + typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> > + DbgValueHistoryMap; + DbgValueHistoryMap DbgValues; + + SmallVector<const MCSymbol *, 8> DebugRangeSymbols; + + // Previous instruction's location information. This is used to determine + // label location to indicate scope boundries in dwarf debug info. + DebugLoc PrevInstLoc; + MCSymbol *PrevLabel; + + // This location indicates end of function prologue and beginning of function + // body. + DebugLoc PrologEndLoc; + + struct FunctionDebugFrameInfo { + unsigned Number; + std::vector<MachineMove> Moves; + + FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M) + : Number(Num), Moves(M) {} + }; + + std::vector<FunctionDebugFrameInfo> DebugFrames; + + // Section Symbols: these are assembler temporary labels that are emitted at + // the beginning of each supported dwarf section. These are used to form + // section offsets and are created by EmitSectionLabels. + MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; + MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; + MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym; + MCSymbol *FunctionBeginSym, *FunctionEndSym; + MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + + // As an optimization, there is no need to emit an entry in the directory + // table for the same directory as DW_at_comp_dir. + StringRef CompilationDir; + + // Counter for assigning globally unique IDs for CUs. + unsigned GlobalCUIndexCount; + + // Holder for the file specific debug information. + DwarfUnits InfoHolder; + + // Holders for the various debug information flags that we might need to + // have exposed. See accessor functions below for description. + + // Whether or not we're emitting info for older versions of gdb on darwin. + bool IsDarwinGDBCompat; + + // DWARF5 Experimental Options + bool HasDwarfAccelTables; + bool HasSplitDwarf; + + // Separated Dwarf Variables + // In general these will all be for bits that are left in the + // original object file, rather than things that are meant + // to be in the .dwo sections. + + // The CUs left in the original object file for separated debug info. + SmallVector<CompileUnit *, 1> SkeletonCUs; + + // Used to uniquely define abbreviations for the skeleton emission. + FoldingSet<DIEAbbrev> SkeletonAbbrevSet; + + // A list of all the unique abbreviations in use. + std::vector<DIEAbbrev *> SkeletonAbbrevs; + + // Holder for the skeleton information. + DwarfUnits SkeletonHolder; + +private: + + void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + + /// \brief Find abstract variable associated with Var. + DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); + + /// \brief Find DIE for the given subprogram and attach appropriate + /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global + /// variables in this scope then create and insert DIEs for these + /// variables. + DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode); + + /// \brief Construct new DW_TAG_lexical_block for this scope and + /// attach DW_AT_low_pc/DW_AT_high_pc labels. + DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + + /// \brief This scope represents inlined body of a function. Construct + /// DIE to represent this concrete inlined copy of the function. + DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + + /// \brief Construct a DIE for this scope. + DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + + /// \brief Emit initial Dwarf sections with a label at the start of each one. + void emitSectionLabels(); + + /// \brief Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); + + /// \brief Compute the size and offset of all the DIEs. + void computeSizeAndOffsets(); + + /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs. + void computeInlinedDIEs(); + + /// \brief Collect info for variables that were optimized out. + void collectDeadVariables(); + + /// \brief Finish off debug information after all functions have been + /// processed. + void finalizeModuleInfo(); + + /// \brief Emit labels to close any remaining sections that have been left + /// open. + void endSections(); + + /// \brief Emit a set of abbreviations to the specific section. + void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *); + + /// \brief Emit the debug info section. + void emitDebugInfo(); + + /// \brief Emit the abbreviation section. + void emitAbbreviations(); + + /// \brief Emit the last address of the section and the end of + /// the line matrix. + void emitEndOfLineMatrix(unsigned SectionEnd); + + /// \brief Emit visible names into a hashed accelerator table section. + void emitAccelNames(); + + /// \brief Emit objective C classes and categories into a hashed + /// accelerator table section. + void emitAccelObjC(); + + /// \brief Emit namespace dies into a hashed accelerator table. + void emitAccelNamespaces(); + + /// \brief Emit type dies into a hashed accelerator table. + void emitAccelTypes(); + + /// \brief Emit visible names into a debug pubnames section. + void emitDebugPubnames(); + + /// \brief Emit visible types into a debug pubtypes section. + void emitDebugPubTypes(); + + /// \brief Emit visible names into a debug str section. + void emitDebugStr(); + + /// \brief Emit visible names into a debug loc section. + void emitDebugLoc(); + + /// \brief Emit visible names into a debug aranges section. + void emitDebugARanges(); + + /// \brief Emit visible names into a debug ranges section. + void emitDebugRanges(); + + /// \brief Emit visible names into a debug macinfo section. + void emitDebugMacInfo(); + + /// \brief Emit inline info using custom format. + void emitDebugInlineInfo(); + + /// DWARF 5 Experimental Split Dwarf Emitters + + /// \brief Construct the split debug info compile unit for the debug info + /// section. + CompileUnit *constructSkeletonCU(const MDNode *); + + /// \brief Emit the local split abbreviations. + void emitSkeletonAbbrevs(const MCSection *); + + /// \brief Emit the debug info dwo section. + void emitDebugInfoDWO(); + + /// \brief Emit the debug abbrev dwo section. + void emitDebugAbbrevDWO(); + + /// \brief Emit the debug str dwo section. + void emitDebugStrDWO(); + + /// \brief Create new CompileUnit for the given metadata node with tag + /// DW_TAG_compile_unit. + CompileUnit *constructCompileUnit(const MDNode *N); + + /// \brief Construct subprogram DIE. + void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); + + /// \brief Register a source line with debug info. Returns the unique + /// label that was emitted and which provides correspondence to the + /// source line list. + void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, + unsigned Flags); + + /// \brief Indentify instructions that are marking the beginning of or + /// ending of a scope. + void identifyScopeMarkers(); + + /// \brief If Var is an current function argument that add it in + /// CurrentFnArguments list. + bool addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, LexicalScope *Scope); + + /// \brief Populate LexicalScope entries with variables' info. + void collectVariableInfo(const MachineFunction *, + SmallPtrSet<const MDNode *, 16> &ProcessedVars); + + /// \brief Collect variable information from the side table maintained + /// by MMI. + void collectVariableInfoFromMMITable(const MachineFunction * MF, + SmallPtrSet<const MDNode *, 16> &P); + + /// \brief Ensure that a label will be emitted before MI. + void requestLabelBeforeInsn(const MachineInstr *MI) { + LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// \brief Return Label preceding the instruction. + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// \brief Ensure that a label will be emitted after MI. + void requestLabelAfterInsn(const MachineInstr *MI) { + LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// \brief Return Label immediately following the instruction. + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(AsmPrinter *A, Module *M); + ~DwarfDebug(); + + /// \brief Emit all Dwarf sections that should come prior to the + /// content. + void beginModule(); + + /// \brief Emit all Dwarf sections that should come after the content. + void endModule(); + + /// \brief Gather pre-function debug information. + void beginFunction(const MachineFunction *MF); + + /// \brief Gather and emit post-function debug information. + void endFunction(const MachineFunction *MF); + + /// \brief Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI); + + /// \brief Process end of an instruction. + void endInstruction(const MachineInstr *MI); + + /// \brief Look up the source id with the given directory and source file + /// names. If none currently exists, create a new id and insert it in the + /// SourceIds map. + unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName, + unsigned CUID); + + /// \brief Recursively Emits a debug information entry. + void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs); + + /// \brief Returns whether or not to limit some of our debug + /// output to the limitations of darwin gdb. + bool useDarwinGDBCompat() { return IsDarwinGDBCompat; } + + // Experimental DWARF5 features. + + /// \brief Returns whether or not to emit tables that dwarf consumers can + /// use to accelerate lookup. + bool useDwarfAccelTables() { return HasDwarfAccelTables; } + + /// \brief Returns whether or not to change the current debug info for the + /// split dwarf proposal support. + bool useSplitDwarf() { return HasSplitDwarf; } +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp new file mode 100644 index 0000000..7133458 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -0,0 +1,736 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +DwarfException::DwarfException(AsmPrinter *A) + : Asm(A), MMI(Asm->MMI) {} + +DwarfException::~DwarfException() {} + +/// SharedTypeIds - How many leading type ids two landing pads have in common. +unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; +} + +/// PadLT - Order landing pads lexicographically by type id. +bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + + for (unsigned i = 0; i != MinSize; ++i) + if (LIds[i] != RIds[i]) + return LIds[i] < RIds[i]; + + return LSize < RSize; +} + +/// ComputeActionsTable - Compute the actions table and gather the first action +/// index for each landing pad site. +unsigned DwarfException:: +ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions) { + + // The action table follows the call-site table in the LSDA. The individual + // records are of two types: + // + // * Catch clause + // * Exception specification + // + // The two record kinds have the same format, with only small differences. + // They are distinguished by the "switch value" field: Catch clauses + // (TypeInfos) have strictly positive switch values, and exception + // specifications (FilterIds) have strictly negative switch values. Value 0 + // indicates a catch-all clause. + // + // Negative type IDs index into FilterIds. Positive type IDs index into + // TypeInfos. The value written for a positive type ID is just the type ID + // itself. For a negative type ID, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type ID (because the FilterIds entries are + // written using a variable width encoding, which outputs one byte per entry + // as long as the value written is not too large) but can differ. This kind + // of complication does not occur for positive type IDs because type infos are + // output using a fixed width encoding. FilterOffsets[i] holds the byte + // offset corresponding to FilterIds[i]. + + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + SmallVector<int, 16> FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= MCAsmInfo::getULEB128Size(*I); + } + + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; + const LandingPadInfo *PrevLPI = 0; + + for (SmallVectorImpl<const LandingPadInfo *>::const_iterator + I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { + const LandingPadInfo *LPI = *I; + const std::vector<int> &TypeIds = LPI->TypeIds; + unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + unsigned PrevAction = (unsigned)-1; + + if (NumShared) { + unsigned SizePrevIds = PrevLPI->TypeIds.size(); + assert(Actions.size()); + PrevAction = Actions.size() - 1; + SizeAction = + MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) + + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); + SizeAction -= + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction += -Actions[PrevAction].NextAction; + PrevAction = Actions[PrevAction].Previous; + } + } + + // Compute the actions. + for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { + int TypeID = TypeIds[J]; + assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; + Actions.push_back(Action); + PrevAction = Actions.size() - 1; + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + // Information used when created the call-site table. The action record + // field of the call site record is the offset of the first associated + // action record, relative to the start of the actions table. This value is + // biased by 1 (1 indicating the start of the actions table), and 0 + // indicates that there are no actions. + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + + PrevLPI = LPI; + } + + return SizeActions; +} + +/// CallToNoUnwindFunction - Return `true' if this is a call to a function +/// marked `nounwind'. Return `false' otherwise. +bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { + assert(MI->isCall() && "This should be a call instruction!"); + + bool MarkedNoUnwind = false; + bool SawFunc = false; + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + + if (!MO.isGlobal()) continue; + + const Function *F = dyn_cast<Function>(MO.getGlobal()); + if (F == 0) continue; + + if (SawFunc) { + // Be conservative. If we have more than one function operand for this + // call, then we can't make the assumption that it's the callee and + // not a parameter to the call. + // + // FIXME: Determine if there's a way to say that `F' is the callee or + // parameter. + MarkedNoUnwind = false; + break; + } + + MarkedNoUnwind = F->doesNotThrow(); + SawFunc = true; + } + + return MarkedNoUnwind; +} + +/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke +/// has a try-range containing the call, a non-zero landing pad, and an +/// appropriate action. The entry for an ordinary call has a try-range +/// containing the call and zero for the landing pad and the action. Calls +/// marked 'nounwind' have no entry and must not be contained in the try-range +/// of any entry - they form gaps in the table. Entries must be ordered by +/// try-range address. +void DwarfException:: +ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + // The end label of the previous invoke or nounwind try-range. + MCSymbol *LastLabel = 0; + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + // Whether the last CallSite entry was for an invoke. + bool PreviousIsInvoke = false; + + // Visit all instructions in order of address. + for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); + MI != E; ++MI) { + if (!MI->isLabel()) { + if (MI->isCall()) + SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); + continue; + } + + // End of the previous try-range? + MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol(); + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::const_iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + const PadRange &P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // For Dwarf exception handling (SjLj handling doesn't use this). If some + // instruction between the previous try-range and this one may throw, + // create a call-site entry with no landing pad for the region between the + // try-ranges. + if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { + CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 }; + CallSites.push_back(Site); + PreviousIsInvoke = false; + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + assert(BeginLabel && LastLabel && "Invalid landing pad!"); + + if (!LandingPad->LandingPadLabel) { + // Create a gap. + PreviousIsInvoke = false; + } else { + // This try-range is for an invoke. + CallSiteEntry Site = { + BeginLabel, + LastLabel, + LandingPad->LandingPadLabel, + FirstActions[P.PadIndex] + }; + + // Try to merge with the previous call-site. SJLJ doesn't do this + if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + if (Asm->MAI->isExceptionHandlingDwarf()) + CallSites.push_back(Site); + else { + // SjLj EH must maintain the call sites in the order assigned + // to them by the SjLjPrepare pass. + unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel); + if (CallSites.size() < SiteNo) + CallSites.resize(SiteNo); + CallSites[SiteNo - 1] = Site; + } + PreviousIsInvoke = true; + } + } + } + + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { + CallSiteEntry Site = { LastLabel, 0, 0, 0 }; + CallSites.push_back(Site); + } +} + +/// EmitExceptionTable - Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type IDs and next +/// action offset. Starting with the action index from the landing pad +/// site, each type ID is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found then the frame is +/// unwound and handling continues. +/// 3. Type ID table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reverse indexed base 1. +void DwarfException::EmitExceptionTable() { + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector<ActionEntry, 32> Actions; + SmallVector<unsigned, 64> FirstActions; + unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions); + + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced when using DWARF exception handling. + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + // Compute the call-site table. + SmallVector<CallSiteEntry, 64> CallSites; + ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); + + // Final tallies. + + // Call sites. + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; + + unsigned CallSiteTableLength; + if (IsSJLJ) + CallSiteTableLength = 0; + else { + unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 + CallSiteTableLength = + CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); + } + + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { + CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action); + if (IsSJLJ) + CallSiteTableLength += MCAsmInfo::getULEB128Size(i); + } + + // Type infos. + const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + unsigned TTypeEncoding; + unsigned TypeFormatSize; + + if (!HaveTTData) { + // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say + // that we're omitting that bit. + TTypeEncoding = dwarf::DW_EH_PE_omit; + // dwarf::DW_EH_PE_absptr + TypeFormatSize = Asm->getDataLayout().getPointerSize(); + } else { + // Okay, we have actual filters or typeinfos to emit. As such, we need to + // pick a type encoding for them. We're about to emit a list of pointers to + // typeinfo objects at the end of the LSDA. However, unless we're in static + // mode, this reference will require a relocation by the dynamic linker. + // + // Because of this, we have a couple of options: + // + // 1) If we are in -static mode, we can always use an absolute reference + // from the LSDA, because the static linker will resolve it. + // + // 2) Otherwise, if the LSDA section is writable, we can output the direct + // reference to the typeinfo and allow the dynamic linker to relocate + // it. Since it is in a writable section, the dynamic linker won't + // have a problem. + // + // 3) Finally, if we're in PIC mode and the LDSA section isn't writable, + // we need to use some form of indirection. For example, on Darwin, + // we can output a statically-relocatable reference to a dyld stub. The + // offset to the stub is constant, but the contents are in a section + // that is updated by the dynamic linker. This is easy enough, but we + // need to tell the personality function of the unwinder to indirect + // through the dyld stub. + // + // FIXME: When (3) is actually implemented, we'll have to emit the stubs + // somewhere. This predicate should be moved to a shared location that is + // in target-independent code. + // + TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); + TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding); + } + + // Begin the exception table. + // Sometimes we want not to emit the data into separate section (e.g. ARM + // EHABI). In this case LSDASection will be NULL. + if (LSDASection) + Asm->OutStreamer.SwitchSection(LSDASection); + Asm->EmitAlignment(2); + + // Emit the LSDA. + MCSymbol *GCCETSym = + Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+ + Twine(Asm->getFunctionNumber())); + Asm->OutStreamer.EmitLabel(GCCETSym); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber())); + + if (IsSJLJ) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_", + Asm->getFunctionNumber())); + + // Emit the LSDA header. + Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + Asm->EmitEncodingByte(TTypeEncoding, "@TType"); + + // The type infos need to be aligned. GCC does this by inserting padding just + // before the type infos. However, this changes the size of the exception + // table, so you need to take this into account when you output the exception + // table size. However, the size is output using a variable length encoding. + // So by increasing the size by inserting padding, you may increase the number + // of bytes used for writing the size. If it increases, say by one byte, then + // you now need to output one less byte of padding to get the type infos + // aligned. However this decreases the size of the exception table. This + // changes the value you have to output for the exception table size. Due to + // the variable length encoding, the number of bytes used for writing the + // length may decrease. If so, you then have to increase the amount of + // padding. And so on. If you look carefully at the GCC code you will see that + // it indeed does this in a loop, going on and on until the values stabilize. + // We chose another solution: don't output padding inside the table like GCC + // does, instead output it before the table. + unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; + unsigned CallSiteTableLengthSize = + MCAsmInfo::getULEB128Size(CallSiteTableLength); + unsigned TTypeBaseOffset = + sizeof(int8_t) + // Call site format + CallSiteTableLengthSize + // Call site table length size + CallSiteTableLength + // Call site table length + SizeActions + // Actions size + SizeTypes; + unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset); + unsigned TotalSize = + sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size + TTypeBaseOffset; // TType base offset + unsigned SizeAlign = (4 - TotalSize) & 3; + + if (HaveTTData) { + // Account for any extra padding that will be added to the call site table + // length. + Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign); + SizeAlign = 0; + } + + bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + + // SjLj Exception handling + if (IsSJLJ) { + Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); + + // Add extra padding if it wasn't added to the TType base offset. + Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + + // Emit the landing pad site information. + unsigned idx = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { + const CallSiteEntry &S = *I; + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + if (VerboseAsm) { + Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<"); + Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx)); + } + Asm->EmitULEB128(idx); + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer.AddComment(" Action: cleanup"); + else + Asm->OutStreamer.AddComment(" Action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->EmitULEB128(S.Action); + } + } else { + // DWARF Exception handling + assert(Asm->MAI->isExceptionHandlingDwarf()); + + // The call-site table is a list of all call sites that may throw an + // exception (including C++ 'throw' statements) in the procedure + // fragment. It immediately follows the LSDA header. Each entry indicates, + // for a given call, the first corresponding action record and corresponding + // landing pad. + // + // The table begins with the number of bytes, stored as an LEB128 + // compressed, unsigned integer. The records immediately follow the record + // count. They are sorted in increasing call-site address. Each record + // indicates: + // + // * The position of the call-site. + // * The position of the landing pad. + // * The first action record for that call site. + // + // A missing entry in the call-site table indicates that a call is not + // supposed to throw. + + // Emit the landing pad call site table. + Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); + + // Add extra padding if it wasn't added to the TType base offset. + Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + + unsigned Entry = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { + const CallSiteEntry &S = *I; + + MCSymbol *EHFuncBeginSym = + Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + + MCSymbol *BeginLabel = S.BeginLabel; + if (BeginLabel == 0) + BeginLabel = EHFuncBeginSym; + MCSymbol *EndLabel = S.EndLabel; + if (EndLabel == 0) + EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + + + // Offset of the call site relative to the previous call site, counted in + // number of 16-byte bundles. The first call site is counted relative to + // the start of the procedure fragment. + if (VerboseAsm) + Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<"); + Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); + if (VerboseAsm) + Asm->OutStreamer.AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + if (!S.PadLabel) { + if (VerboseAsm) + Asm->OutStreamer.AddComment(" has no landing pad"); + Asm->OutStreamer.EmitIntValue(0, 4/*size*/); + } else { + if (VerboseAsm) + Asm->OutStreamer.AddComment(Twine(" jumps to ") + + S.PadLabel->getName()); + Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4); + } + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer.AddComment(" On action: cleanup"); + else + Asm->OutStreamer.AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->EmitULEB128(S.Action); + } + } + + // Emit the Action Table. + int Entry = 0; + for (SmallVectorImpl<ActionEntry>::const_iterator + I = Actions.begin(), E = Actions.end(); I != E; ++I) { + const ActionEntry &Action = *I; + + if (VerboseAsm) { + // Emit comments that decode the action table. + Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<"); + } + + // Type Filter + // + // Used by the runtime to match the type of the thrown exception to the + // type of the catch clauses or the types in the exception specification. + if (VerboseAsm) { + if (Action.ValueForTypeID > 0) + Asm->OutStreamer.AddComment(" Catch TypeInfo " + + Twine(Action.ValueForTypeID)); + else if (Action.ValueForTypeID < 0) + Asm->OutStreamer.AddComment(" Filter TypeInfo " + + Twine(Action.ValueForTypeID)); + else + Asm->OutStreamer.AddComment(" Cleanup"); + } + Asm->EmitSLEB128(Action.ValueForTypeID); + + // Action Record + // + // Self-relative signed displacement in bytes of the next action record, + // or 0 if there is no next action record. + if (VerboseAsm) { + if (Action.NextAction == 0) { + Asm->OutStreamer.AddComment(" No further actions"); + } else { + unsigned NextAction = Entry + (Action.NextAction + 1) / 2; + Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction)); + } + } + Asm->EmitSLEB128(Action.NextAction); + } + + EmitTypeInfos(TTypeEncoding); + + Asm->EmitAlignment(2); +} + +void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + + int Entry = 0; + // Emit the Catch TypeInfos. + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer.AddBlankLine(); + Entry = TypeInfos.size(); + } + + for (std::vector<const GlobalVariable *>::const_reverse_iterator + I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { + const GlobalVariable *GV = *I; + if (VerboseAsm) + Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); + Asm->EmitTTypeReference(GV, TTypeEncoding); + } + + // Emit the Exception Specifications. + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer.AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer.AddBlankLine(); + Entry = 0; + } + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + if (VerboseAsm) { + --Entry; + if (TypeID != 0) + Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); + } + + Asm->EmitULEB128(TypeID); + } +} + +/// EndModule - Emit all exception information that should come after the +/// content. +void DwarfException::EndModule() { + llvm_unreachable("Should be implemented"); +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void DwarfException::BeginFunction(const MachineFunction *MF) { + llvm_unreachable("Should be implemented"); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void DwarfException::EndFunction() { + llvm_unreachable("Should be implemented"); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h new file mode 100644 index 0000000..74b1b13 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -0,0 +1,234 @@ +//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include <vector> + +namespace llvm { + +template <typename T> class SmallVectorImpl; +struct LandingPadInfo; +class MachineModuleInfo; +class MachineMove; +class MachineInstr; +class MachineFunction; +class MCAsmInfo; +class MCExpr; +class MCSymbol; +class Function; +class AsmPrinter; + +//===----------------------------------------------------------------------===// +/// DwarfException - Emits Dwarf exception handling directives. +/// +class DwarfException { +protected: + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + /// MMI - Collected machine module information. + MachineModuleInfo *MMI; + + /// SharedTypeIds - How many leading type ids two landing pads have in common. + static unsigned SharedTypeIds(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// PadLT - Order landing pads lexicographically by type id. + static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R); + + /// PadRange - Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap<MCSymbol *, PadRange> RangeMapType; + + /// ActionEntry - Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + unsigned Previous; + }; + + /// CallSiteEntry - Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + MCSymbol *BeginLabel; // zero indicates the start of the function. + MCSymbol *EndLabel; // zero indicates the end of the function. + + // The landing pad starts at PadLabel. + MCSymbol *PadLabel; // zero indicates that there is no landing pad. + unsigned Action; + }; + + /// ComputeActionsTable - Compute the actions table and gather the first + /// action index for each landing pad site. + unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + /// CallToNoUnwindFunction - Return `true' if this is a call to a function + /// marked `nounwind'. Return `false' otherwise. + bool CallToNoUnwindFunction(const MachineInstr *MI); + + /// ComputeCallSiteTable - Compute the call-site table. The entry for an + /// invoke has a try-range containing the call, a non-zero landing pad and an + /// appropriate action. The entry for an ordinary call has a try-range + /// containing the call and zero for the landing pad and the action. Calls + /// marked 'nounwind' have no entry and must not be contained in the try-range + /// of any entry - they form gaps in the table. Entries must be ordered by + /// try-range address. + void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<unsigned> &FirstActions); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + void EmitExceptionTable(); + + virtual void EmitTypeInfos(unsigned TTypeEncoding); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfException(AsmPrinter *A); + virtual ~DwarfException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + +class DwarfCFIException : public DwarfException { + /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality + /// should be emitted. + bool shouldEmitPersonality; + + /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda + /// should be emitted. + bool shouldEmitLSDA; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + AsmPrinter::CFIMoveType moveTypeModule; + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfCFIException(AsmPrinter *A); + virtual ~DwarfCFIException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + +class ARMException : public DwarfException { + void EmitTypeInfos(unsigned TTypeEncoding); +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + ARMException(AsmPrinter *A); + virtual ~ARMException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + +class Win64Exception : public DwarfException { + /// shouldEmitPersonality - Per-function flag to indicate if personality + /// info should be emitted. + bool shouldEmitPersonality; + + /// shouldEmitLSDA - Per-function flag to indicate if the LSDA + /// should be emitted. + bool shouldEmitLSDA; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + Win64Exception(AsmPrinter *A); + virtual ~Win64Exception(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp new file mode 100644 index 0000000..a8fb66d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -0,0 +1,120 @@ +//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the compiler plugin that is used in order to emit +// garbage collection information in a convenient layout for parsing and +// loading in the Erlang/OTP runtime. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGCPrinter : public GCMetadataPrinter { + public: + void beginAssembly(AsmPrinter &AP); + void finishAssembly(AsmPrinter &AP); + }; + +} + +static GCMetadataPrinterRegistry::Add<ErlangGCPrinter> +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGCPrinter() { } + +void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { } + +void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { + MCStreamer &OS = AP.OutStreamer; + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + + // Put this in a custom .note section. + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() + .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0, + SectionKind::getDataRel())); + + // For each function... + for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + GCFunctionInfo &MD = **FI; + + /** A compact GC layout. Emit this data structure: + * + * struct { + * int16_t PointCount; + * void *SafePointAddress[PointCount]; + * int16_t StackFrameSize; (in words) + * int16_t StackArity; + * int16_t LiveCount; + * int16_t LiveOffsets[LiveCount]; + * } __gcmap_<FUNCTIONNAME>; + **/ + + // Align to address width. + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + // Emit PointCount. + OS.AddComment("safe point count"); + AP.EmitInt16(MD.size()); + + // And each safe point... + for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; + ++PI) { + // Emit the address of the safe point. + OS.AddComment("safe point address"); + MCSymbol *Label = PI->Label; + AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/); + } + + // Stack information never change in safe points! Only print info from the + // first call-site. + GCFunctionInfo::iterator PI = MD.begin(); + + // Emit the stack frame size. + OS.AddComment("stack frame size (in words)"); + AP.EmitInt16(MD.getFrameSize() / IntPtrSize); + + // Emit stack arity, i.e. the number of stacked arguments. + unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ? + MD.getFunction().arg_size() - RegisteredArgs : 0; + OS.AddComment("stack arity"); + AP.EmitInt16(StackArity); + + // Emit the number of live roots in the function. + OS.AddComment("live root count"); + AP.EmitInt16(MD.live_size(PI)); + + // And for each live root... + for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + // Emit live root's offset within the stack frame. + OS.AddComment("stack index (offset / wordsize)"); + AP.EmitInt16(LI->StackOffset / IntPtrSize); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp new file mode 100644 index 0000000..98177c0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -0,0 +1,166 @@ +//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements printing the assembly code for an Ocaml frametable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include <cctype> +using namespace llvm; + +namespace { + + class OcamlGCMetadataPrinter : public GCMetadataPrinter { + public: + void beginAssembly(AsmPrinter &AP); + void finishAssembly(AsmPrinter &AP); + }; + +} + +static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter> +Y("ocaml", "ocaml 3.10-compatible collector"); + +void llvm::linkOcamlGCPrinter() { } + +static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { + const std::string &MId = M.getModuleIdentifier(); + + std::string SymName; + SymName += "caml"; + size_t Letter = SymName.size(); + SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); + SymName += "__"; + SymName += Id; + + // Capitalize the first letter of the module name. + SymName[Letter] = toupper(SymName[Letter]); + + SmallString<128> TmpStr; + AP.Mang->getNameWithPrefix(TmpStr, SymName); + + MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr); + + AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); + AP.OutStreamer.EmitLabel(Sym); +} + +void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), AP, "code_begin"); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), AP, "data_begin"); +} + +/// emitAssembly - Print the frametable. The ocaml frametable format is thus: +/// +/// extern "C" struct align(sizeof(intptr_t)) { +/// uint16_t NumDescriptors; +/// struct align(sizeof(intptr_t)) { +/// void *ReturnAddress; +/// uint16_t FrameSize; +/// uint16_t NumLiveOffsets; +/// uint16_t LiveOffsets[NumLiveOffsets]; +/// } Descriptors[NumDescriptors]; +/// } caml${module}__frametable; +/// +/// Note that this precludes programs from stack frames larger than 64K +/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if +/// either condition is detected in a function which uses the GC. +/// +void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), AP, "code_end"); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), AP, "data_end"); + + // FIXME: Why does ocaml emit this?? + AP.OutStreamer.EmitIntValue(0, IntPtrSize); + + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), AP, "frametable"); + + int NumDescriptors = 0; + for (iterator I = begin(), IE = end(); I != IE; ++I) { + GCFunctionInfo &FI = **I; + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + NumDescriptors++; + } + } + + if (NumDescriptors >= 1<<16) { + // Very rude! + report_fatal_error(" Too much descriptor for ocaml GC"); + } + AP.EmitInt16(NumDescriptors); + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + for (iterator I = begin(), IE = end(); I != IE; ++I) { + GCFunctionInfo &FI = **I; + + uint64_t FrameSize = FI.getFrameSize(); + if (FrameSize >= 1<<16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Frame size " + Twine(FrameSize) + ">= 65536.\n" + "(" + Twine(uintptr_t(&FI)) + ")"); + } + + AP.OutStreamer.AddComment("live roots for " + + Twine(FI.getFunction().getName())); + AP.OutStreamer.AddBlankLine(); + + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + size_t LiveCount = FI.live_size(J); + if (LiveCount >= 1<<16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Live root count "+Twine(LiveCount)+" >= 65536."); + } + + AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize); + AP.EmitInt16(FrameSize); + AP.EmitInt16(LiveCount); + + for (GCFunctionInfo::live_iterator K = FI.live_begin(J), + KE = FI.live_end(J); K != KE; ++K) { + if (K->StackOffset >= 1<<16) { + // Very rude! + report_fatal_error( + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); + } + AP.EmitInt16(K->StackOffset); + } + + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp new file mode 100644 index 0000000..1561012 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -0,0 +1,114 @@ +//===-- CodeGen/AsmPrinter/Win64Exception.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +Win64Exception::Win64Exception(AsmPrinter *A) + : DwarfException(A), + shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) + {} + +Win64Exception::~Win64Exception() {} + +/// EndModule - Emit all exception information that should come after the +/// content. +void Win64Exception::EndModule() { +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void Win64Exception::BeginFunction(const MachineFunction *MF) { + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + + // If any landing pads survive, we need an EH table. + bool hasLandingPads = !MMI->getLandingPads().empty(); + + shouldEmitMoves = Asm->needsSEHMoves(); + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + + shouldEmitPersonality = hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per; + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + if (!shouldEmitPersonality && !shouldEmitMoves) + return; + + Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym); + + if (!shouldEmitPersonality) + return; + + MCSymbol *GCCHandlerSym = + Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); + Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void Win64Exception::EndFunction() { + if (!shouldEmitPersonality && !shouldEmitMoves) + return; + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + if (shouldEmitPersonality) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + + Asm->OutStreamer.PushSection(); + Asm->OutStreamer.EmitWin64EHHandlerData(); + Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), + 4); + EmitExceptionTable(); + Asm->OutStreamer.PopSection(); + } + Asm->OutStreamer.EmitWin64EHEndProc(); +} diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp new file mode 100644 index 0000000..012ff8a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -0,0 +1,466 @@ +//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the implementation of a basic TargetTransformInfo pass +/// predicated on the target abstractions present in the target independent +/// code generator. It uses these (primarily TargetLowering) to model as much +/// of the TTI query interface as possible. It is included by most targets so +/// that they can specialize only a small subset of the query space. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "basictti" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Target/TargetLowering.h" +#include <utility> + +using namespace llvm; + +namespace { + +class BasicTTI : public ImmutablePass, public TargetTransformInfo { + const TargetLoweringBase *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + BasicTTI() : ImmutablePass(ID), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { + initializeBasicTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual bool isLegalAddImmediate(int64_t imm) const; + virtual bool isLegalICmpImmediate(int64_t imm) const; + virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; + virtual bool isTypeLegal(Type *Ty) const; + virtual unsigned getJumpBufAlignment() const; + virtual unsigned getJumpBufSize() const; + virtual bool shouldBuildLookupTables() const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCFInstrCost(unsigned Opcode) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, + ArrayRef<Type*> Tys) const; + virtual unsigned getNumberOfParts(Type *Tp) const; + virtual unsigned getAddressComputationCost(Type *Ty) const; + + /// @} +}; + +} + +INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", + "Target independent code generator's TTI", true, true, false) +char BasicTTI::ID = 0; + +ImmutablePass * +llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { + return new BasicTTI(TLI); +} + + +bool BasicTTI::isLegalAddImmediate(int64_t imm) const { + return TLI->isLegalAddImmediate(imm); +} + +bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { + return TLI->isLegalICmpImmediate(imm); +} + +bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const { + TargetLoweringBase::AddrMode AM; + AM.BaseGV = BaseGV; + AM.BaseOffs = BaseOffset; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Scale; + return TLI->isLegalAddressingMode(AM, Ty); +} + +bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { + return TLI->isTruncateFree(Ty1, Ty2); +} + +bool BasicTTI::isTypeLegal(Type *Ty) const { + EVT T = TLI->getValueType(Ty); + return TLI->isTypeLegal(T); +} + +unsigned BasicTTI::getJumpBufAlignment() const { + return TLI->getJumpBufAlignment(); +} + +unsigned BasicTTI::getJumpBufSize() const { + return TLI->getJumpBufSize(); +} + +bool BasicTTI::shouldBuildLookupTables() const { + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +//===----------------------------------------------------------------------===// +// +// Calls used by the vectorizers. +// +//===----------------------------------------------------------------------===// + +unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { + return 1; +} + +unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { + return 32; +} + +unsigned BasicTTI::getMaximumUnrollFactor() const { + return 1; +} + +unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const { + // Check if any of the operands are vector operands. + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; + } + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; + } + + // Else, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Ty, true, true) + Num * Cost; + } + + // We don't know anything about this scalar instruction. + return 1; +} + +unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return 1; +} + +unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); + + // Check for NOOP conversions. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Bitcast between types that are legalized to the same type are free. + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + return 0; + } + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) + return 0; + + // If the cast is marked as legal (or promote) then assume low cost. + if (TLI->isOperationLegalOrPromote(ISD, DstLT.second)) + return 1; + + // Handle scalar conversions. + if (!Src->isVectorTy() && !Dst->isVectorTy()) { + + // Scalar bitcasts are usually free. + if (Opcode == Instruction::BitCast) + return 0; + + // Just check the op cost. If the operation is legal then assume it costs 1. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return 1; + + // Assume that illegal scalar instruction are expensive. + return 4; + } + + // Check vector-to-vector casts. + if (Dst->isVectorTy() && Src->isVectorTy()) { + + // If the cast is between same-sized registers, then the check is simple. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Assume that Zext is done using AND. + if (Opcode == Instruction::ZExt) + return 1; + + // Assume that sext is done using SHL and SRA. + if (Opcode == Instruction::SExt) + return 2; + + // Just check the op cost. If the operation is legal then assume it costs + // 1 and multiply by the type-legalization overhead. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return SrcLT.first * 1; + } + + // If we are converting vectors and the operation is illegal, or + // if the vectors are legalized to different types, estimate the + // scalarization costs. + unsigned Num = Dst->getVectorNumElements(); + unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), + Src->getScalarType()); + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + return getScalarizationOverhead(Dst, true, true) + Num * Cost; + } + + // We already handled vector-to-vector and scalar-to-scalar conversions. This + // is where we handle bitcast between vectors and scalars. We need to assume + // that the conversion is scalarized in one way or another. + if (Opcode == Instruction::BitCast) + // Illegal bitcasts are done by storing and loading from a stack slot. + return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + + (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); + + llvm_unreachable("Unhandled cast"); + } + +unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { + // Branches are assumed to be predicted. + return 0; +} + +unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Selects on vectors are actually vector selects. + if (ISD == ISD::SELECT) { + assert(CondTy && "CondTy must exist"); + if (CondTy->isVectorTy()) + ISD = ISD::VSELECT; + } + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Otherwise, assume that the cast is scalarized. + if (ValTy->isVectorTy()) { + unsigned Num = ValTy->getVectorNumElements(); + if (CondTy) + CondTy = CondTy->getScalarType(); + unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), + CondTy); + + // Return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(ValTy, true, false) + Num * Cost; + } + + // Unknown scalar opcode. + return 1; +} + +unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + return 1; +} + +unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + assert(!Src->isVoidTy() && "Invalid type"); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + + // Assume that all loads of legal types cost 1. + return LT.first; +} + +unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> Tys) const { + unsigned ISD = 0; + switch (IID) { + default: { + // Assume that we need to scalarize this intrinsic. + unsigned ScalarizationCost = 0; + unsigned ScalarCalls = 1; + if (RetTy->isVectorTy()) { + ScalarizationCost = getScalarizationOverhead(RetTy, true, false); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { + if (Tys[i]->isVectorTy()) { + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + } + + return ScalarCalls + ScalarizationCost; + } + // Look for intrinsics that can be lowered directly or turned into a scalar + // intrinsic call. + case Intrinsic::sqrt: ISD = ISD::FSQRT; break; + case Intrinsic::sin: ISD = ISD::FSIN; break; + case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::exp: ISD = ISD::FEXP; break; + case Intrinsic::exp2: ISD = ISD::FEXP2; break; + case Intrinsic::log: ISD = ISD::FLOG; break; + case Intrinsic::log10: ISD = ISD::FLOG10; break; + case Intrinsic::log2: ISD = ISD::FLOG2; break; + case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::floor: ISD = ISD::FFLOOR; break; + case Intrinsic::ceil: ISD = ISD::FCEIL; break; + case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::pow: ISD = ISD::FPOW; break; + case Intrinsic::fma: ISD = ISD::FMA; break; + case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + } + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; + } + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; + } + + // Else, assume that we need to scalarize this intrinsic. For math builtins + // this will emit a costly libcall, adding call overhead and spills. Make it + // very expensive. + if (RetTy->isVectorTy()) { + unsigned Num = RetTy->getVectorNumElements(); + unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), + Tys); + return 10 * Cost * Num; + } + + // This is going to be turned into a library call, make it expensive. + return 10; +} + +unsigned BasicTTI::getNumberOfParts(Type *Tp) const { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + return LT.first; +} + +unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { + return 0; +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp new file mode 100644 index 0000000..f8cc3b3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -0,0 +1,1725 @@ +//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass forwards branches to unconditional branches to make them branch +// directly to the target block. This pass often results in dead MBB's, which +// it then removes. +// +// Note that this pass must be run after register allocation, it cannot handle +// SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "branchfolding" +#include "BranchFolding.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumBranchOpts, "Number of branches optimized"); +STATISTIC(NumTailMerge , "Number of block tails merged"); +STATISTIC(NumHoist , "Number of times common instructions are hoisted"); + +static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", + cl::init(cl::BOU_UNSET), cl::Hidden); + +// Throttle for huge numbers of predecessors (compile speed problems) +static cl::opt<unsigned> +TailMergeThreshold("tail-merge-threshold", + cl::desc("Max number of predecessors to consider tail merging"), + cl::init(150), cl::Hidden); + +// Heuristic for tail merging (and, inversely, tail duplication). +// TODO: This should be replaced with a target query. +static cl::opt<unsigned> +TailMergeSize("tail-merge-size", + cl::desc("Min number of instructions to consider tail merging"), + cl::init(3), cl::Hidden); + +namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. + class BranchFolderPass : public MachineFunctionPass { + public: + static char ID; + explicit BranchFolderPass(): MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char BranchFolderPass::ID = 0; +char &llvm::BranchFolderPassID = BranchFolderPass::ID; + +INITIALIZE_PASS(BranchFolderPass, "branch-folder", + "Control Flow Optimizer", false, false) + +bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + return Folder.OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); +} + + +BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } + + EnableHoistCommonCode = CommonHoist; +} + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + + MachineFunction *MF = MBB->getParent(); + // drop all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // Avoid matching if this pointer gets reused. + TriedMerging.erase(MBB); + + // Remove the block. + MF->erase(MBB); +} + +/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def +/// followed by terminators, and if the implicitly defined registers are not +/// used by the terminators, remove those implicit_def's. e.g. +/// BB1: +/// r0 = implicit_def +/// r1 = implicit_def +/// br +/// This block can be optimized away later if the implicit instructions are +/// removed. +bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { + SmallSet<unsigned, 4> ImpDefRegs; + MachineBasicBlock::iterator I = MBB->begin(); + while (I != MBB->end()) { + if (!I->isImplicitDef()) + break; + unsigned Reg = I->getOperand(0).getReg(); + ImpDefRegs.insert(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); + ++I; + } + if (ImpDefRegs.empty()) + return false; + + MachineBasicBlock::iterator FirstTerm = I; + while (I != MBB->end()) { + if (!TII->isUnpredicatedTerminator(I)) + return false; + // See if it uses any of the implicitly defined registers. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (ImpDefRegs.count(Reg)) + return false; + } + ++I; + } + + I = MBB->begin(); + while (I != FirstTerm) { + MachineInstr *ImpDefMI = &*I; + ++I; + MBB->erase(ImpDefMI); + } + + return true; +} + +/// OptimizeFunction - Perhaps branch folding, tail merging and other +/// CFG optimizations on the given function. +bool BranchFolder::OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi) { + if (!tii) return false; + + TriedMerging.clear(); + + TII = tii; + TRI = tri; + MMI = mmi; + RS = NULL; + + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); + + // Fix CFG. The later algorithms expect it to be right. + bool MadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { + MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) + MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + MadeChange |= OptimizeImpDefsBlock(MBB); + } + + bool MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + if (EnableHoistCommonCode) + MadeChangeThisIteration |= HoistCommonCode(MF); + MadeChange |= MadeChangeThisIteration; + } + + // See if any jump tables have become dead as the code generator + // did its thing. + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + if (JTI == 0) { + delete RS; + return MadeChange; + } + + // Walk the function to find jump tables that are live. + BitVector JTIsLive(JTI->getJumpTables().size()); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + MachineOperand &Op = I->getOperand(op); + if (!Op.isJTI()) continue; + + // Remember that this JT is live. + JTIsLive.set(Op.getIndex()); + } + } + + // Finally, remove dead jump tables. This happens when the + // indirect jump was unreachable (and thus deleted). + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive.test(i)) { + JTI->RemoveJumpTable(i); + MadeChange = true; + } + + delete RS; + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Tail Merging of Blocks +//===----------------------------------------------------------------------===// + +/// HashMachineInstr - Compute a hash value for MI and its operands. +static unsigned HashMachineInstr(const MachineInstr *MI) { + unsigned Hash = MI->getOpcode(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + + // Merge in bits from the operand if easy. + unsigned OperandHash = 0; + switch (Op.getType()) { + case MachineOperand::MO_Register: OperandHash = Op.getReg(); break; + case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break; + case MachineOperand::MO_MachineBasicBlock: + OperandHash = Op.getMBB()->getNumber(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + OperandHash = Op.getIndex(); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + // Global address / external symbol are too hard, don't bother, but do + // pull in the offset. + OperandHash = Op.getOffset(); + break; + default: break; + } + + Hash += ((OperandHash << 3) | Op.getType()) << (i&31); + } + return Hash; +} + +/// HashEndOfMBB - Hash the last instruction in the MBB. +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { + MachineBasicBlock::const_iterator I = MBB->end(); + if (I == MBB->begin()) + return 0; // Empty MBB. + + --I; + // Skip debug info so it will not affect codegen. + while (I->isDebugValue()) { + if (I==MBB->begin()) + return 0; // MBB empty except for debug info. + --I; + } + + return HashMachineInstr(I); +} + +/// ComputeCommonTailLength - Given two machine basic blocks, compute the number +/// of instructions they actually have in common together at their end. Return +/// iterators for the first shared instruction in each block. +static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2) { + I1 = MBB1->end(); + I2 = MBB2->end(); + + unsigned TailLen = 0; + while (I1 != MBB1->begin() && I2 != MBB2->begin()) { + --I1; --I2; + // Skip debugging pseudos; necessary to avoid changing the code. + while (I1->isDebugValue()) { + if (I1==MBB1->begin()) { + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) + // I1==DBG at begin; I2==DBG at begin + return TailLen; + --I2; + } + ++I2; + // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin + return TailLen; + } + --I1; + } + // I1==first (untested) non-DBG preceding known match + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) { + ++I1; + // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin + return TailLen; + } + --I2; + } + // I1, I2==first (untested) non-DBGs preceding known match + if (!I1->isIdenticalTo(I2) || + // FIXME: This check is dubious. It's used to get around a problem where + // people incorrectly expect inline asm directives to remain in the same + // relative order. This is untenable because normal compiler + // optimizations (like this one) may reorder and/or merge these + // directives. + I1->isInlineAsm()) { + ++I1; ++I2; + break; + } + ++TailLen; + } + // Back past possible debugging pseudos at beginning of block. This matters + // when one block differs from the other only by whether debugging pseudos + // are present at the beginning. (This way, the various checks later for + // I1==MBB1->begin() work as expected.) + if (I1 == MBB1->begin() && I2 != MBB2->begin()) { + --I2; + while (I2->isDebugValue()) { + if (I2 == MBB2->begin()) + return TailLen; + --I2; + } + ++I2; + } + if (I2 == MBB2->begin() && I1 != MBB1->begin()) { + --I1; + while (I1->isDebugValue()) { + if (I1 == MBB1->begin()) + return TailLen; + --I1; + } + ++I1; + } + return TailLen; +} + +void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB) { + if (RS) { + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(prior(CurMBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) { + MachineBasicBlock *CurMBB = OldInst->getParent(); + + TII->ReplaceTailWithBranchTo(OldInst, NewDest); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(CurMBB, NewDest); + + ++NumTailMerge; +} + +/// SplitMBBAt - Given a machine basic block and an iterator into it, split the +/// MBB so that the part before the iterator falls into the part starting at the +/// iterator. This returns the new MBB. +MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return 0; + + MachineFunction &MF = *CurMBB.getParent(); + + // Create the fall-through block. + MachineFunction::iterator MBBI = &CurMBB; + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + CurMBB.getParent()->insert(++MBBI, NewMBB); + + // Move all the successors of this block to the specified block. + NewMBB->transferSuccessors(&CurMBB); + + // Add an edge from CurMBB to NewMBB for the fall-through. + CurMBB.addSuccessor(NewMBB); + + // Splice the code over. + NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(&CurMBB, NewMBB); + + return NewMBB; +} + +/// EstimateRuntime - Make a rough estimate for how long it will take to run +/// the specified code. +static unsigned EstimateRuntime(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + unsigned Time = 0; + for (; I != E; ++I) { + if (I->isDebugValue()) + continue; + if (I->isCall()) + Time += 10; + else if (I->mayLoad() || I->mayStore()) + Time += 2; + else + ++Time; + } + return Time; +} + +// CurMBB needs to add an unconditional branch to SuccMBB (we removed these +// branches temporarily for tail merging). In the case where CurMBB ends +// with a conditional branch to the next block, optimize by reversing the +// test and conditionally branching to SuccMBB instead. +static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, + const TargetInstrInfo *TII) { + MachineFunction *MF = CurMBB->getParent(); + MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + DebugLoc dl; // FIXME: this is nowhere + if (I != MF->end() && + !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { + MachineBasicBlock *NextBB = I; + if (TBB == NextBB && !Cond.empty() && !FBB) { + if (!TII->ReverseBranchCondition(Cond)) { + TII->RemoveBranch(*CurMBB); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); + return; + } + } + } + TII->InsertBranch(*CurMBB, SuccBB, NULL, + SmallVector<MachineOperand, 0>(), dl); +} + +bool +BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { + if (getHash() < o.getHash()) + return true; + if (getHash() > o.getHash()) + return false; + if (getBlock()->getNumber() < o.getBlock()->getNumber()) + return true; + if (getBlock()->getNumber() > o.getBlock()->getNumber()) + return false; + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. +#ifndef _GLIBCXX_DEBUG + llvm_unreachable("Predecessor appears twice"); +#else + return false; +#endif +} + +/// CountTerminators - Count the number of terminators in the given +/// block and set I to the position of the first non-terminator, if there +/// is one, or MBB->end() otherwise. +static unsigned CountTerminators(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &I) { + I = MBB->end(); + unsigned NumTerms = 0; + for (;;) { + if (I == MBB->begin()) { + I = MBB->end(); + break; + } + --I; + if (!I->isTerminator()) break; + ++NumTerms; + } + return NumTerms; +} + +/// ProfitableToMerge - Check if two machine basic blocks have a common tail +/// and decide if it would be profitable to merge those tails. Return the +/// length of the common tail and iterators to the first common instruction +/// in each block. +static bool ProfitableToMerge(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + unsigned minCommonTailLength, + unsigned &CommonTailLen, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); + if (CommonTailLen == 0) + return false; + DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber() + << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen + << '\n'); + + // It's almost always profitable to merge any number of non-terminator + // instructions with the block that falls through into the common successor. + if (MBB1 == PredBB || MBB2 == PredBB) { + MachineBasicBlock::iterator I; + unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I); + if (CommonTailLen > NumTerms) + return true; + } + + // If one of the blocks can be completely merged and happens to be in + // a position where the other could fall through into it, merge any number + // of instructions, because it can be done without a branch. + // TODO: If the blocks are not adjacent, move one of them so that they are? + if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + return true; + if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + return true; + + // If both blocks have an unconditional branch temporarily stripped out, + // count that as an additional common instruction for the following + // heuristics. + unsigned EffectiveTailLen = CommonTailLen; + if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && + !MBB1->back().isBarrier() && + !MBB2->back().isBarrier()) + ++EffectiveTailLen; + + // Check if the common tail is long enough to be worthwhile. + if (EffectiveTailLen >= minCommonTailLength) + return true; + + // If we are optimizing for code size, 2 instructions in common is enough if + // we don't have to split a block. At worst we will be introducing 1 new + // branch instruction, which is likely to be smaller than the 2 + // instructions that would be deleted in the merge. + MachineFunction *MF = MBB1->getParent(); + if (EffectiveTailLen >= 2 && + MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && + (I1 == MBB1->begin() || I2 == MBB2->begin())) + return true; + + return false; +} + +/// ComputeSameTails - Look through all the blocks in MergePotentials that have +/// hash CurHash (guaranteed to match the last element). Build the vector +/// SameTails of all those that have the (same) largest number of instructions +/// in common of any pair of these blocks. SameTails entries contain an +/// iterator into MergePotentials (from which the MachineBasicBlock can be +/// found) and a MachineBasicBlock::iterator into that MBB indicating the +/// instruction where the matching code sequence begins. +/// Order of elements in SameTails is the reverse of the order in which +/// those blocks appear in MergePotentials (where they are not necessarily +/// consecutive). +unsigned BranchFolder::ComputeSameTails(unsigned CurHash, + unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + unsigned maxCommonTailLength = 0U; + SameTails.clear(); + MachineBasicBlock::iterator TrialBBI1, TrialBBI2; + MPIterator HighestMPIter = prior(MergePotentials.end()); + for (MPIterator CurMPIter = prior(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter != B && CurMPIter->getHash() == CurHash; + --CurMPIter) { + for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) { + unsigned CommonTailLen; + if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), + minCommonTailLength, + CommonTailLen, TrialBBI1, TrialBBI2, + SuccBB, PredBB)) { + if (CommonTailLen > maxCommonTailLength) { + SameTails.clear(); + maxCommonTailLength = CommonTailLen; + HighestMPIter = CurMPIter; + SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1)); + } + if (HighestMPIter == CurMPIter && + CommonTailLen == maxCommonTailLength) + SameTails.push_back(SameTailElt(I, TrialBBI2)); + } + if (I == B) + break; + } + } + return maxCommonTailLength; +} + +/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from +/// MergePotentials, restoring branches at ends of blocks as appropriate. +void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + MPIterator CurMPIter, B; + for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; + --CurMPIter) { + // Put the unconditional branch back, if we need one. + MachineBasicBlock *CurMBB = CurMPIter->getBlock(); + if (SuccBB && CurMBB != PredBB) + FixTail(CurMBB, SuccBB, TII); + if (CurMPIter == B) + break; + } + if (CurMPIter->getHash() != CurHash) + CurMPIter++; + MergePotentials.erase(CurMPIter, MergePotentials.end()); +} + +/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist +/// only of the common tail. Create a block that does by splitting one. +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; + unsigned TimeEstimate = ~0U; + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + // Use PredBB if possible; that doesn't require a new branch. + if (SameTails[i].getBlock() == PredBB) { + commonTailIndex = i; + break; + } + // Otherwise, make a (fairly bogus) choice based on estimate of + // how long it will take the various blocks to execute. + unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(), + SameTails[i].getTailStartPos()); + if (t <= TimeEstimate) { + TimeEstimate = t; + commonTailIndex = i; + } + } + + MachineBasicBlock::iterator BBI = + SameTails[commonTailIndex].getTailStartPos(); + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + // If the common tail includes any debug info we will take it pretty + // randomly from one of the inputs. Might be better to remove it? + DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " + << maxCommonTailLength); + + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + + SameTails[commonTailIndex].setBlock(newMBB); + SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); + + // If we split PredBB, newMBB is the new predecessor. + if (PredBB == MBB) + PredBB = newMBB; + + return true; +} + +// See if any of the blocks in MergePotentials (which all have a common single +// successor, or all have no successor) can be tail-merged. If there is a +// successor, any blocks in MergePotentials that are not tail-merged and +// are not immediately before Succ must have an unconditional branch to +// Succ added (but the predecessor/successor lists need no adjustment). +// The lone predecessor of Succ that falls through into Succ, +// if any, is given in PredBB. + +bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + bool MadeChange = false; + + // Except for the special cases below, tail-merge if there are at least + // this many instructions in common. + unsigned minCommonTailLength = TailMergeSize; + + DEBUG(dbgs() << "\nTryTailMergeBlocks: "; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", "); + dbgs() << "\n"; + if (SuccBB) { + dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + if (PredBB) + dbgs() << " which has fall-through from BB#" + << PredBB->getNumber() << "\n"; + } + dbgs() << "Looking for common tails of at least " + << minCommonTailLength << " instruction" + << (minCommonTailLength == 1 ? "" : "s") << '\n'; + ); + + // Sort by hash value so that blocks with identical end sequences sort + // together. + std::stable_sort(MergePotentials.begin(), MergePotentials.end()); + + // Walk through equivalence sets looking for actual exact matches. + while (MergePotentials.size() > 1) { + unsigned CurHash = MergePotentials.back().getHash(); + + // Build SameTails, identifying the set of blocks with this hash code + // and with the maximum number of instructions in common. + unsigned maxCommonTailLength = ComputeSameTails(CurHash, + minCommonTailLength, + SuccBB, PredBB); + + // If we didn't find any pair that has at least minCommonTailLength + // instructions in common, remove all blocks with this hash code and retry. + if (SameTails.empty()) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + + // If one of the blocks is the entire common tail (and not the entry + // block, which we can't jump to), we can treat all blocks with this same + // tail at once. Use PredBB if that is one of the possibilities, as that + // will not introduce any extra branches. + MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()-> + getParent()->begin(); + unsigned commonTailIndex = SameTails.size(); + // If there are two blocks, check to see if one can be made to fall through + // into the other. + if (SameTails.size() == 2 && + SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && + SameTails[1].tailIsWholeBlock()) + commonTailIndex = 1; + else if (SameTails.size() == 2 && + SameTails[1].getBlock()->isLayoutSuccessor( + SameTails[0].getBlock()) && + SameTails[0].tailIsWholeBlock()) + commonTailIndex = 0; + else { + // Otherwise just pick one, favoring the fall-through predecessor if + // there is one. + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + MachineBasicBlock *MBB = SameTails[i].getBlock(); + if (MBB == EntryBB && SameTails[i].tailIsWholeBlock()) + continue; + if (MBB == PredBB) { + commonTailIndex = i; + break; + } + if (SameTails[i].tailIsWholeBlock()) + commonTailIndex = i; + } + } + + if (commonTailIndex == SameTails.size() || + (SameTails[commonTailIndex].getBlock() == PredBB && + !SameTails[commonTailIndex].tailIsWholeBlock())) { + // None of the blocks consist entirely of the common tail. + // Split a block so that one does. + if (!CreateCommonTailOnlyBlock(PredBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + } + + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + // MBB is common tail. Adjust all other BB's to jump to this one. + // Traversal must be forwards so erases work. + DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() + << " for "); + for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { + if (commonTailIndex == i) + continue; + DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", ")); + // Hack the end off BB i, making it jump to BB commonTailIndex instead. + ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); + // BB i is no longer a predecessor of SuccBB; remove it from the worklist. + MergePotentials.erase(SameTails[i].getMPIter()); + } + DEBUG(dbgs() << "\n"); + // We leave commonTailIndex in the worklist in case there are other blocks + // that match it with a smaller number of instructions. + MadeChange = true; + } + return MadeChange; +} + +bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { + bool MadeChange = false; + if (!EnableTailMerge) return MadeChange; + + // First find blocks with no successors. + MergePotentials.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E && MergePotentials.size() < TailMergeThreshold; ++I) { + if (TriedMerging.count(I)) + continue; + if (I->succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); + } + + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + // See if we can do any tail merging on those. + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(NULL, NULL); + + // Look at blocks (IBB) with multiple predecessors (PBB). + // We change each predecessor to a canonical form, by + // (1) temporarily removing any unconditional branch from the predecessor + // to IBB, and + // (2) alter conditional branches so they branch to the other block + // not IBB; this may require adding back an unconditional branch to IBB + // later, where there wasn't one coming in. E.g. + // Bcc IBB + // fallthrough to QBB + // here becomes + // Bncc QBB + // with a conceptual B to IBB after that, which never actually exists. + // With those changes, we see whether the predecessors' tails match, + // and merge them if so. We change things out of canonical form and + // back to the way they were later in the process. (OptimizeBranches + // would undo some of this, but we can't use it, because we'd get into + // a compile-time infinite loop repeatedly doing and undoing the same + // transformations.) + + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ++I) { + if (I->pred_size() < 2) continue; + SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { + MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; + + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB == IBB) + continue; + + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; + + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and we cannot reverse the + // branch. + SmallVector<MachineOperand, 4> NewCond(Cond); + if (!Cond.empty() && TBB == IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = llvm::next(MachineFunction::iterator(PBB)); + } + + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice to have + // a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) + PredNextBB = IP; + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB != IBB && FBB != IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB != IBB) // ubr + continue; + } else { + if (TBB != IBB && IBB != PredNextBB) // cbr + continue; + } + } + + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + } + + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); + } + } + + // If this is a large problem, avoid visiting the same basic blocks multiple + // times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + + // Reinsert an unconditional branch if needed. The 1 below can occur as a + // result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); + } + + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Branch Optimization +//===----------------------------------------------------------------------===// + +bool BranchFolder::OptimizeBranches(MachineFunction &MF) { + bool MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ) { + MachineBasicBlock *MBB = I++; + MadeChange |= OptimizeBlock(MBB); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + +// Blocks should be considered empty if they contain only debug info; +// else the debug info would affect codegen. +static bool IsEmptyBlock(MachineBasicBlock *MBB) { + if (MBB->empty()) + return true; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI!=MBBE; ++MBBI) { + if (!MBBI->isDebugValue()) + return false; + } + return true; +} + +// Blocks with only debug info and branches should be considered the same +// as blocks with only branches. +static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator MBBI, MBBE; + for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) { + if (!MBBI->isDebugValue()) + break; + } + return (MBBI->isBranch()); +} + +/// IsBetterFallthrough - Return true if it would be clearly better to +/// fall-through to MBB1 than to fall through into MBB2. This has to return +/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will +/// result in infinite loops. +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { + // Right now, we use a simple heuristic. If MBB2 ends with a call, and + // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to + // optimize branches that branch to either a return block or an assert block + // into a fallthrough to the return. + if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false; + + // If there is a clear successor ordering we make sure that one block + // will fall through to the next + if (MBB1->isSuccessor(MBB2)) return true; + if (MBB2->isSuccessor(MBB1)) return false; + + // Neither block consists entirely of debug info (per IsEmptyBlock check), + // so we needn't test for falling off the beginning here. + MachineBasicBlock::iterator MBB1I = --MBB1->end(); + while (MBB1I->isDebugValue()) + --MBB1I; + MachineBasicBlock::iterator MBB2I = --MBB2->end(); + while (MBB2I->isDebugValue()) + --MBB2I; + return MBB2I->isCall() && !MBB1I->isCall(); +} + +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. Always use the DebugLoc of the first +/// branching instruction found unless its absent, in which case use the +/// DebugLoc of the second if present. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return DebugLoc(); + --I; + while (I->isDebugValue() && I != MBB.begin()) + --I; + if (I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); +} + +/// OptimizeBlock - Analyze and optimize control flow related to the specified +/// block. This is never called on the entry block. +bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + bool MadeChange = false; + MachineFunction &MF = *MBB->getParent(); +ReoptimizeBlock: + + MachineFunction::iterator FallThrough = MBB; + ++FallThrough; + + // If this block is empty, make everyone use its fall-through, not the block + // explicitly. Landing pads should not do this since the landing-pad table + // points to this block. Blocks with their addresses taken shouldn't be + // optimized away. + if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { + // Dead block? Leave for cleanup later. + if (MBB->pred_empty()) return MadeChange; + + if (FallThrough == MF.end()) { + // TODO: Simplify preds to not branch here if possible! + } else { + // Rewrite all predecessors of the old block to go to the fallthrough + // instead. + while (!MBB->pred_empty()) { + MachineBasicBlock *Pred = *(MBB->pred_end()-1); + Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); + } + // If MBB was the target of a jump table, update jump tables to go to the + // fallthrough instead. + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, FallThrough); + MadeChange = true; + } + return MadeChange; + } + + // Check to see if we can simplify the terminator of the block before this + // one. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector<MachineOperand, 4> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + if (!PriorUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); + + // If the previous branch is conditional and both conditions go to the same + // destination, remove the branch, replacing it with an unconditional one or + // a fall-through. + if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + PriorCond.clear(); + if (PriorTBB != MBB) + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the previous block unconditionally falls through to this block and + // this block has no other predecessors, move the contents of this block + // into the prior block. This doesn't usually happen when SimplifyCFG + // has been used, but it can happen if tail merging splits a fall-through + // predecessor of a block. + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && + PrevBB.succ_size() == 1 && + !MBB->hasAddressTaken() && !MBB->isLandingPad()) { + DEBUG(dbgs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *MBB); + // Remove redundant DBG_VALUEs first. + if (PrevBB.begin() != PrevBB.end()) { + MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); + --PrevBBIter; + MachineBasicBlock::iterator MBBIter = MBB->begin(); + // Check if DBG_VALUE at the end of PrevBB is identical to the + // DBG_VALUE at the beginning of MBB. + while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() + && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { + if (!MBBIter->isIdenticalTo(PrevBBIter)) + break; + MachineInstr *DuplicateDbg = MBBIter; + ++MBBIter; -- PrevBBIter; + DuplicateDbg->eraseFromParent(); + } + } + PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin()); + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(MBB); + MadeChange = true; + return MadeChange; + } + + // If the previous branch *only* branches to *this* block (conditional or + // not) remove the branch. + if (PriorTBB == MBB && PriorFBB == 0) { + TII->RemoveBranch(PrevBB); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the prior block branches somewhere else on the condition and here if + // the condition is false, remove the uncond second branch. + if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the prior block branches here on true and somewhere else on false, and + // if the branch condition is reversible, reverse the branch to create a + // fall-through. + if (PriorTBB == MBB) { + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + } + + // If this block has no successors (e.g. it is a return block or ends with + // a call to a no-return function like abort or __cxa_throw) and if the pred + // falls through into this block, and if it would otherwise fall through + // into the block after this, move this block to the end of the function. + // + // We consider it more likely that execution will stay in the function (e.g. + // due to loops) than it is to exit it. This asserts in loops etc, moving + // the assert condition out of the loop body. + if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && + MachineFunction::iterator(PriorTBB) == FallThrough && + !MBB->canFallThrough()) { + bool DoTransform = true; + + // We have to be careful that the succs of PredBB aren't both no-successor + // blocks. If neither have successors and if PredBB is the second from + // last block in the function, we'd just keep swapping the two blocks for + // last. Only do the swap if one is clearly better to fall through than + // the other. + if (FallThrough == --MF.end() && + !IsBetterFallthrough(PriorTBB, MBB)) + DoTransform = false; + + if (DoTransform) { + // Reverse the branch so we will fall through on the previous true cond. + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DEBUG(dbgs() << "\nMoving MBB: " << *MBB + << "To make fallthrough to: " << *PriorTBB << "\n"); + + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); + + // Move this block to the end of the function. + MBB->moveAfter(--MF.end()); + MadeChange = true; + ++NumBranchOpts; + return MadeChange; + } + } + } + } + + // Analyze the branch in the current block. + MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + SmallVector<MachineOperand, 4> CurCond; + bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); + if (!CurUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); + + // If this is a two-way branch, and the FBB branches to this block, reverse + // the condition so the single-basic-block loop is faster. Instead of: + // Loop: xxx; jcc Out; jmp Loop + // we want: + // Loop: xxx; jncc Loop; jmp Out + if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { + SmallVector<MachineOperand, 4> NewCond(CurCond); + if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + } + + // If this branch is the only thing in its block, see if we can forward + // other blocks across it. + if (CurTBB && CurCond.empty() && CurFBB == 0 && + IsBranchOnlyBlock(MBB) && CurTBB != MBB && + !MBB->hasAddressTaken()) { + DebugLoc dl = getBranchDebugLoc(*MBB); + // This block may contain just an unconditional branch. Because there can + // be 'non-branch terminators' in the block, try removing the branch and + // then seeing if the block is empty. + TII->RemoveBranch(*MBB); + // If the only things remaining in the block are debug info, remove these + // as well, so this will behave the same as an empty block in non-debug + // mode. + if (!MBB->empty()) { + bool NonDebugInfoFound = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (!I->isDebugValue()) { + NonDebugInfoFound = true; + break; + } + } + if (!NonDebugInfoFound) + // Make the block empty, losing the debug info (we could probably + // improve this in some cases.) + MBB->erase(MBB->begin(), MBB->end()); + } + // If this block is just an unconditional branch to CurTBB, we can + // usually completely eliminate the block. The only case we cannot + // completely eliminate the block is when the block before this one + // falls through into MBB and we can't understand the prior block's branch + // condition. + if (MBB->empty()) { + bool PredHasNoFallThrough = !PrevBB.canFallThrough(); + if (PredHasNoFallThrough || !PriorUnAnalyzable || + !PrevBB.isSuccessor(MBB)) { + // If the prior block falls through into us, turn it into an + // explicit branch to us to make updates simpler. + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + PriorTBB != MBB && PriorFBB != MBB) { + if (PriorTBB == 0) { + assert(PriorCond.empty() && PriorFBB == 0 && + "Bad branch analysis"); + PriorTBB = MBB; + } else { + assert(PriorFBB == 0 && "Machine CFG out of date!"); + PriorFBB = MBB; + } + DebugLoc pdl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); + } + + // Iterate through all the predecessors, revectoring each in-turn. + size_t PI = 0; + bool DidChange = false; + bool HasBranchToSelf = false; + while(PI != MBB->pred_size()) { + MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); + if (PMBB == MBB) { + // If this block has an uncond branch to itself, leave it. + ++PI; + HasBranchToSelf = true; + } else { + DidChange = true; + PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + // If this change resulted in PMBB ending in a conditional + // branch where both conditions go to the same destination, + // change this to an unconditional branch (and fix the CFG). + MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0; + SmallVector<MachineOperand, 4> NewCurCond; + bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, + NewCurFBB, NewCurCond, true); + if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); + TII->RemoveBranch(*PMBB); + NewCurCond.clear(); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); + MadeChange = true; + ++NumBranchOpts; + PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); + } + } + } + + // Change any jumptables to go to the new MBB. + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, CurTBB); + if (DidChange) { + ++NumBranchOpts; + MadeChange = true; + if (!HasBranchToSelf) return MadeChange; + } + } + } + + // Add the branch back if the block is more than just an uncond branch. + TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); + } + } + + // If the prior block doesn't fall through into this block, and if this + // block doesn't fall through into some other block, see if we can find a + // place to move this block where a fall-through will happen. + if (!PrevBB.canFallThrough()) { + + // Now we know that there was no fall-through into this block, check to + // see if it has a fall-through into its successor. + bool CurFallsThru = MBB->canFallThrough(); + + if (!MBB->isLandingPad()) { + // Check all the predecessors of this block. If one of them has no fall + // throughs, move this block right after it. + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) { + // Analyze the branch at the end of the pred. + MachineBasicBlock *PredBB = *PI; + MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; + MachineBasicBlock *PredTBB = 0, *PredFBB = 0; + SmallVector<MachineOperand, 4> PredCond; + if (PredBB != MBB && !PredBB->canFallThrough() && + !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) + && (!CurFallsThru || !CurTBB || !CurFBB) + && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { + // If the current block doesn't fall through, just move it. + // If the current block can fall through and does not end with a + // conditional branch, we need to append an unconditional jump to + // the (current) next block. To avoid a possible compile-time + // infinite loop, move blocks only backward in this case. + // Also, if there are already 2 branches here, we cannot add a third; + // this means we have the case + // Bcc next + // B elsewhere + // next: + if (CurFallsThru) { + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + CurCond.clear(); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); + } + MBB->moveAfter(PredBB); + MadeChange = true; + goto ReoptimizeBlock; + } + } + } + + if (!CurFallsThru) { + // Check all successors to see if we can move this block before it. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + // Analyze the branch at the end of the block before the succ. + MachineBasicBlock *SuccBB = *SI; + MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; + + // If this block doesn't already fall-through to that successor, and if + // the succ doesn't already have a block that can fall through into it, + // and if the successor isn't an EH destination, we can arrange for the + // fallthrough to happen. + if (SuccBB != MBB && &*SuccPrev != MBB && + !SuccPrev->canFallThrough() && !CurUnAnalyzable && + !SuccBB->isLandingPad()) { + MBB->moveBefore(SuccBB); + MadeChange = true; + goto ReoptimizeBlock; + } + } + + // Okay, there is no really great place to put this block. If, however, + // the block before this one would be a fall-through if this block were + // removed, move this block to the end of the function. + MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0; + SmallVector<MachineOperand, 4> PrevCond; + if (FallThrough != MF.end() && + !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && + PrevBB.isSuccessor(FallThrough)) { + MBB->moveAfter(--MF.end()); + MadeChange = true; + return MadeChange; + } + } + } + + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Hoist Common Code +//===----------------------------------------------------------------------===// + +/// HoistCommonCode - Hoist common instruction sequences at the start of basic +/// blocks to their common predecessor. +bool BranchFolder::HoistCommonCode(MachineFunction &MF) { + bool MadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + MadeChange |= HoistCommonCodeInSuccs(MBB); + } + + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// findHoistingInsertPosAndDeps - Find the location to move common instructions +/// in successors to. The location is usually just before the terminator, +/// however if the terminator is a conditional branch and its previous +/// instruction is the flag setting instruction, the previous instruction is +/// the preferred location. This function also gathers uses and defs of the +/// instructions from the insertion point to the end of the block. The data is +/// used by HoistCommonCodeInSuccs to ensure safety. +static +MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + SmallSet<unsigned,4> &Uses, + SmallSet<unsigned,4> &Defs) { + MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); + if (!TII->isUnpredicatedTerminator(Loc)) + return MBB->end(); + + for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Loc->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } else if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + } + + if (Uses.empty()) + return Loc; + if (Loc == MBB->begin()) + return MBB->end(); + + // The terminator is probably a conditional branch, try not to separate the + // branch from condition setting instruction. + MachineBasicBlock::iterator PI = Loc; + --PI; + while (PI != MBB->begin() && Loc->isDebugValue()) + --PI; + + bool IsDef = false; + for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { + const MachineOperand &MO = PI->getOperand(i); + // If PI has a regmask operand, it is probably a call. Separate away. + if (MO.isRegMask()) + return Loc; + if (!MO.isReg() || MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (Uses.count(Reg)) + IsDef = true; + } + if (!IsDef) + // The condition setting instruction is not just before the conditional + // branch. + return Loc; + + // Be conservative, don't insert instruction above something that may have + // side-effects. And since it's potentially bad to separate flag setting + // instruction from the conditional branch, just abort the optimization + // completely. + // Also avoid moving code above predicated instruction since it's hard to + // reason about register liveness with predicated instruction. + bool DontMoveAcrossStore = true; + if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) || + TII->isPredicated(PI)) + return MBB->end(); + + + // Find out what registers are live. Note this routine is ignoring other live + // registers which are only used by instructions in successor blocks. + for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = PI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } else { + if (Uses.erase(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative + } + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); + } + } + + return PI; +} + +/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction +/// sequence at the start of the function, move the instructions before MBB +/// terminator if it's legal. +bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) + return false; + + if (!FBB) FBB = findFalseBlock(MBB, TBB); + if (!FBB) + // Malformed bcc? True and false blocks are the same? + return false; + + // Restrict the optimization to cases where MBB is the only predecessor, + // it is an obvious win. + if (TBB->pred_size() > 1 || FBB->pred_size() > 1) + return false; + + // Find a suitable position to hoist the common instructions to. Also figure + // out which registers are used or defined by instructions from the insertion + // point to the end of the block. + SmallSet<unsigned, 4> Uses, Defs; + MachineBasicBlock::iterator Loc = + findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs); + if (Loc == MBB->end()) + return false; + + bool HasDups = false; + SmallVector<unsigned, 4> LocalDefs; + SmallSet<unsigned, 4> LocalDefsSet; + MachineBasicBlock::iterator TIB = TBB->begin(); + MachineBasicBlock::iterator FIB = FBB->begin(); + MachineBasicBlock::iterator TIE = TBB->end(); + MachineBasicBlock::iterator FIE = FBB->end(); + while (TIB != TIE && FIB != FIE) { + // Skip dbg_value instructions. These do not count. + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) + break; + } + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) + break; + } + if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead)) + break; + + if (TII->isPredicated(TIB)) + // Hard to reason about register liveness with predicated instruction. + break; + + bool IsSafe = true; + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + // Don't attempt to hoist instructions with register masks. + if (MO.isRegMask()) { + IsSafe = false; + break; + } + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + if (Uses.count(Reg)) { + // Avoid clobbering a register that's used by the instruction at + // the point of insertion. + IsSafe = false; + break; + } + + if (Defs.count(Reg) && !MO.isDead()) { + // Don't hoist the instruction if the def would be clobber by the + // instruction at the point insertion. FIXME: This is overly + // conservative. It should be possible to hoist the instructions + // in BB2 in the following example: + // BB1: + // r1, eflag = op1 r2, r3 + // brcc eflag + // + // BB2: + // r1 = op2, ... + // = op3, r1<kill> + IsSafe = false; + break; + } + } else if (!LocalDefsSet.count(Reg)) { + if (Defs.count(Reg)) { + // Use is defined by the instruction at the point of insertion. + IsSafe = false; + break; + } + + if (MO.isKill() && Uses.count(Reg)) + // Kills a register that's read by the instruction at the point of + // insertion. Remove the kill marker. + MO.setIsKill(false); + } + } + if (!IsSafe) + break; + + bool DontMoveAcrossStore = true; + if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) + break; + + // Remove kills from LocalDefsSet, these registers had short live ranges. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || !LocalDefsSet.count(Reg)) + continue; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); + } + + // Track local defs so we can update liveins. + for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { + MachineOperand &MO = TIB->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isDead()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LocalDefs.push_back(Reg); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.insert(*AI); + } + + HasDups = true; + ++TIB; + ++FIB; + } + + if (!HasDups) + return false; + + MBB->splice(Loc, TBB, TBB->begin(), TIB); + FBB->erase(FBB->begin(), FIB); + + // Update livein's. + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Def = LocalDefs[i]; + if (LocalDefsSet.count(Def)) { + TBB->addLiveIn(Def); + FBB->addLiveIn(Def); + } + } + + ++NumHoist; + return true; +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h new file mode 100644 index 0000000..df795df --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -0,0 +1,123 @@ +//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP +#define LLVM_CODEGEN_BRANCHFOLDING_HPP + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include <vector> + +namespace llvm { + class MachineFunction; + class MachineModuleInfo; + class RegScavenger; + class TargetInstrInfo; + class TargetRegisterInfo; + + class BranchFolder { + public: + explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist); + + bool OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi); + private: + class MergePotentialsElt { + unsigned Hash; + MachineBasicBlock *Block; + public: + MergePotentialsElt(unsigned h, MachineBasicBlock *b) + : Hash(h), Block(b) {} + + unsigned getHash() const { return Hash; } + MachineBasicBlock *getBlock() const { return Block; } + + void setBlock(MachineBasicBlock *MBB) { + Block = MBB; + } + + bool operator<(const MergePotentialsElt &) const; + }; + typedef std::vector<MergePotentialsElt>::iterator MPIterator; + std::vector<MergePotentialsElt> MergePotentials; + SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; + + class SameTailElt { + MPIterator MPIter; + MachineBasicBlock::iterator TailStartPos; + public: + SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) + : MPIter(mp), TailStartPos(tsp) {} + + MPIterator getMPIter() const { + return MPIter; + } + MergePotentialsElt &getMergePotentialsElt() const { + return *getMPIter(); + } + MachineBasicBlock::iterator getTailStartPos() const { + return TailStartPos; + } + unsigned getHash() const { + return getMergePotentialsElt().getHash(); + } + MachineBasicBlock *getBlock() const { + return getMergePotentialsElt().getBlock(); + } + bool tailIsWholeBlock() const { + return TailStartPos == getBlock()->begin(); + } + + void setBlock(MachineBasicBlock *MBB) { + getMergePotentialsElt().setBlock(MBB); + } + void setTailStartPos(MachineBasicBlock::iterator Pos) { + TailStartPos = Pos; + } + }; + std::vector<SameTailElt> SameTails; + + bool EnableTailMerge; + bool EnableHoistCommonCode; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineModuleInfo *MMI; + RegScavenger *RS; + + bool TailMergeBlocks(MachineFunction &MF); + bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); + + bool OptimizeBranches(MachineFunction &MF); + bool OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + + bool HoistCommonCode(MachineFunction &MF); + bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); + }; +} + +#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp new file mode 100644 index 0000000..dee339a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -0,0 +1,202 @@ +//===------------------------ CalcSpillWeights.cpp ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "calcspillweights" + +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +char CalculateSpillWeights::ID = 0; +INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights", + "Calculate spill weights", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights", + "Calculate spill weights", false, false) + +void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<LiveIntervals>(); + au.addRequired<MachineLoopInfo>(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { + + DEBUG(dbgs() << "********** Compute Spill Weights **********\n" + << "********** Function: " << MF.getName() << '\n'); + + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>()); + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI.reg_nodbg_empty(Reg)) + continue; + VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); + } + return false; +} + +// Return the preferred allocation register for reg, given a COPY instruction. +static unsigned copyHint(const MachineInstr *mi, unsigned reg, + const TargetRegisterInfo &tri, + const MachineRegisterInfo &mri) { + unsigned sub, hreg, hsub; + if (mi->getOperand(0).getReg() == reg) { + sub = mi->getOperand(0).getSubReg(); + hreg = mi->getOperand(1).getReg(); + hsub = mi->getOperand(1).getSubReg(); + } else { + sub = mi->getOperand(1).getSubReg(); + hreg = mi->getOperand(0).getReg(); + hsub = mi->getOperand(0).getSubReg(); + } + + if (!hreg) + return 0; + + if (TargetRegisterInfo::isVirtualRegister(hreg)) + return sub == hsub ? hreg : 0; + + const TargetRegisterClass *rc = mri.getRegClass(reg); + + // Only allow physreg hints in rc. + if (sub == 0) + return rc->contains(hreg) ? hreg : 0; + + // reg:sub should match the physreg hreg. + return tri.getMatchingSuperReg(hreg, sub, rc); +} + +// Check if all values in LI are rematerializable +static bool isRematerializable(const LiveInterval &LI, + const LiveIntervals &LIS, + const TargetInstrInfo &TII) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + if (VNI->isPHIDef()) + return false; + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); + + if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis())) + return false; + } + return true; +} + +void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { + MachineRegisterInfo &mri = MF.getRegInfo(); + const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); + MachineBasicBlock *mbb = 0; + MachineLoop *loop = 0; + unsigned loopDepth = 0; + bool isExiting = false; + float totalWeight = 0; + SmallPtrSet<MachineInstr*, 8> visited; + + // Find the best physreg hist and the best virtreg hint. + float bestPhys = 0, bestVirt = 0; + unsigned hintPhys = 0, hintVirt = 0; + + // Don't recompute a target specific hint. + bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + + // Don't recompute spill weight for an unspillable register. + bool Spillable = li.isSpillable(); + + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); + MachineInstr *mi = I.skipInstruction();) { + if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) + continue; + if (!visited.insert(mi)) + continue; + + float weight = 1.0f; + if (Spillable) { + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = Loops.getLoopFor(mbb); + loopDepth = loop ? loop->getLoopDepth() : 0; + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; + } + + // Get allocation hints from copies. + if (noHint || !mi->isCopy()) + continue; + unsigned hint = copyHint(mi, li.reg, tri, mri); + if (!hint) + continue; + float hweight = Hint[hint] += weight; + if (TargetRegisterInfo::isPhysicalRegister(hint)) { + if (hweight > bestPhys && mri.isAllocatable(hint)) + bestPhys = hweight, hintPhys = hint; + } else { + if (hweight > bestVirt) + bestVirt = hweight, hintVirt = hint; + } + } + + Hint.clear(); + + // Always prefer the physreg hint. + if (unsigned hint = hintPhys ? hintPhys : hintVirt) { + mri.setRegAllocationHint(li.reg, 0, hint); + // Weakly boost the spill weight of hinted registers. + totalWeight *= 1.01F; + } + + // If the live interval was already unspillable, leave it that way. + if (!Spillable) + return; + + // Mark li as unspillable if all live ranges are tiny. + if (li.isZeroLength(LIS.getSlotIndexes())) { + li.markNotSpillable(); + return; + } + + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) + totalWeight *= 0.5F; + + li.weight = normalizeSpillWeight(totalWeight, li.getSize()); +} diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp new file mode 100644 index 0000000..f1d4ace --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -0,0 +1,180 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, + const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs, + LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Unknown) { + // No stack is used. + StackOffset = 0; + + clearFirstByValReg(); + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate space on the stack large enough to pass an argument +// by value. The size and alignment information of the argument is encoded in +// its parameter attribute. +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + MF.getFrameInfo()->ensureMaxAlignment(Align); + TM.getTargetLowering()->HandleByVal(this, Size, Align); + unsigned Offset = AllocateStack(Size, Align); + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + UsedRegs[*AI/32] |= 1 << (*AI&31); +} + +/// AnalyzeFormalArguments - Analyze an array of argument values, +/// incorporating info about the formals into this state. +void +CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + unsigned NumArgs = Ins.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Formal argument #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'; +#endif + llvm_unreachable(0); + } + } +} + +/// CheckReturn - Analyze the return values of a function, returning true if +/// the return can be performed without sret-demotion, and false otherwise. +bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + MVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + return false; + } + return true; +} + +/// AnalyzeReturn - Analyze the returned values of a return, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + MVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Return operand #" << i << " has unhandled type " + << EVT(VT).getEVTString() << '\n'; +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + unsigned NumOps = Outs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'; +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'; +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + MVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call result #" << i << " has unhandled type " + << EVT(VT).getEVTString() << '\n'; +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { +#ifndef NDEBUG + dbgs() << "Call result has unhandled type " + << EVT(VT).getEVTString() << '\n'; +#endif + llvm_unreachable(0); + } +} diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp new file mode 100644 index 0000000..35ec68d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -0,0 +1,77 @@ +//===-- CodeGen.cpp -------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the common initialization routines for the +// CodeGen library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeCodeGen - Initialize all passes linked into the CodeGen library. +void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeBasicTTIPass(Registry); + initializeBranchFolderPassPass(Registry); + initializeCalculateSpillWeightsPass(Registry); + initializeDeadMachineInstructionElimPass(Registry); + initializeEarlyIfConverterPass(Registry); + initializeExpandPostRAPass(Registry); + initializeExpandISelPseudosPass(Registry); + initializeFinalizeMachineBundlesPass(Registry); + initializeGCMachineCodeAnalysisPass(Registry); + initializeGCModuleInfoPass(Registry); + initializeIfConverterPass(Registry); + initializeLiveDebugVariablesPass(Registry); + initializeLiveIntervalsPass(Registry); + initializeLiveStacksPass(Registry); + initializeLiveVariablesPass(Registry); + initializeLocalStackSlotPassPass(Registry); + initializeMachineBlockFrequencyInfoPass(Registry); + initializeMachineBlockPlacementPass(Registry); + initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineCopyPropagationPass(Registry); + initializeMachineCSEPass(Registry); + initializeMachineDominatorTreePass(Registry); + initializeMachinePostDominatorTreePass(Registry); + initializeMachineLICMPass(Registry); + initializeMachineLoopInfoPass(Registry); + initializeMachineModuleInfoPass(Registry); + initializeMachineSchedulerPass(Registry); + initializeMachineSinkingPass(Registry); + initializeMachineVerifierPassPass(Registry); + initializeOptimizePHIsPass(Registry); + initializePHIEliminationPass(Registry); + initializePeepholeOptimizerPass(Registry); + initializePostRASchedulerPass(Registry); + initializeProcessImplicitDefsPass(Registry); + initializePEIPass(Registry); + initializeRegisterCoalescerPass(Registry); + initializeSlotIndexesPass(Registry); + initializeStackProtectorPass(Registry); + initializeStackColoringPass(Registry); + initializeStackSlotColoringPass(Registry); + initializeStrongPHIEliminationPass(Registry); + initializeTailDuplicatePassPass(Registry); + initializeTargetPassConfigPass(Registry); + initializeTwoAddressInstructionPassPass(Registry); + initializeUnpackMachineBundlesPass(Registry); + initializeUnreachableBlockElimPass(Registry); + initializeUnreachableMachineBlockElimPass(Registry); + initializeVirtRegMapPass(Registry); + initializeVirtRegRewriterPass(Registry); + initializeLowerIntrinsicsPass(Registry); + initializeMachineFunctionPrinterPassPass(Registry); +} + +void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { + initializeCodeGen(*unwrap(R)); +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp new file mode 100644 index 0000000..0eb74a4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -0,0 +1,658 @@ +//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "CriticalAntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +CriticalAntiDepBreaker:: +CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : + AntiDepBreaker(), MF(MFi), + MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), + TRI(MF.getTarget().getRegisterInfo()), + RegClassInfo(RCI), + Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), + KillIndices(TRI->getNumRegs(), 0), + DefIndices(TRI->getNumRegs(), 0), + KeepRegs(TRI->getNumRegs(), false) {} + +CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { +} + +void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { + const unsigned BBSize = BB->size(); + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + // Clear out the register class data. + Classes[i] = static_cast<const TargetRegisterClass *>(0); + + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } + + // Clear "do not change" set. + KeepRegs.reset(); + + bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; + } + } + + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(BB); + for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + if (!IsReturnBlock && !Pristine.test(*I)) continue; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; + } + } +} + +void CriticalAntiDepBreaker::FinishBlock() { + RegRefs.clear(); + KeepRegs.reset(); +} + +void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) { + if (MI->isDebugValue()) + return; + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] != ~0u) { + // If Reg is currently live, then mark that it can't be renamed as + // we don't know the extent of its live-range anymore (now that it + // has been scheduled). + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = Count; + } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { + // Any register which was defined within the previous scheduling region + // may have been rescheduled and its lifetime may overlap with registers + // in ways not reflected in our current liveness state. For each such + // register, adjust the liveness state to be conservatively correct. + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Move the def index to the end of the previous region, to reflect + // that the def could theoretically have been scheduled at the end. + DefIndices[Reg] = InsertPosIndex; + } + } + + PrescanInstruction(MI); + ScanInstruction(MI, Count); +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static const SDep *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + const SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + +void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservative here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + + // Scan the register operands for this instruction and update + // Classes and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + const TargetRegisterClass *NewRC = 0; + + if (i < MI->getDesc().getNumOperands()) + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Now check for aliases. + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + // If an alias of the reg is used during the live range, give up. + // Note that this allows us to skip checking if AntiDepReg + // overlaps with any of the aliases, among other things. + unsigned AliasReg = *AI; + if (Classes[AliasReg]) { + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + + // If we're still willing to consider this register, note the reference. + if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) + RegRefs.insert(std::make_pair(Reg, &MO)); + + if (MO.isUse() && Special) { + if (!KeepRegs.test(Reg)) { + KeepRegs.set(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + KeepRegs.set(*SubRegs); + } + } + } +} + +void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, + unsigned Count) { + // Update liveness. + // Proceeding upwards, registers that are defed but not used in this + // instruction are now dead. + + if (!TII->isPredicated(MI)) { + // Predicated defs are modeled as read + write, i.e. similar to two + // address updates. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (MO.isRegMask()) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (MO.clobbersPhysReg(i)) { + DefIndices[i] = Count; + KillIndices[i] = ~0u; + KeepRegs.reset(i); + Classes[i] = 0; + RegRefs.erase(i); + } + + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.reset(Reg); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.reset(SubregReg); + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isUse()) continue; + + const TargetRegisterClass *NewRC = 0; + if (i < MI->getDesc().getNumOperands()) + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + RegRefs.insert(std::make_pair(Reg, &MO)); + + // It wasn't previously live but now it is, this is a kill. + if (KillIndices[Reg] == ~0u) { + KillIndices[Reg] = Count; + DefIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + } + // Repeat, for all aliases. + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (KillIndices[AliasReg] == ~0u) { + KillIndices[AliasReg] = Count; + DefIndices[AliasReg] = ~0u; + } + } + } +} + +// Check all machine operands that reference the antidependent register and must +// be replaced by NewReg. Return true if any of their parent instructions may +// clobber the new register. +// +// Note: AntiDepReg may be referenced by a two-address instruction such that +// it's use operand is tied to a def operand. We guard against the case in which +// the two-address instruction also defines NewReg, as may happen with +// pre/postincrement loads. In this case, both the use and def operands are in +// RegRefs because the def is inserted by PrescanInstruction and not erased +// during ScanInstruction. So checking for an instructions with definitions of +// both NewReg and AntiDepReg covers it. +bool +CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned NewReg) +{ + for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) { + MachineOperand *RefOper = I->second; + + // Don't allow the instruction defining AntiDepReg to earlyclobber its + // operands, in case they may be assigned to NewReg. In this case antidep + // breaking must fail, but it's too rare to bother optimizing. + if (RefOper->isDef() && RefOper->isEarlyClobber()) + return true; + + // Handle cases in which this instructions defines NewReg. + MachineInstr *MI = RefOper->getParent(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &CheckOper = MI->getOperand(i); + + if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg)) + return true; + + if (!CheckOper.isReg() || !CheckOper.isDef() || + CheckOper.getReg() != NewReg) + continue; + + // Don't allow the instruction to define NewReg and AntiDepReg. + // When AntiDepReg is renamed it will be an illegal op. + if (RefOper->isDef()) + return true; + + // Don't allow an instruction using AntiDepReg to be earlyclobbered by + // NewReg + if (CheckOper.isEarlyClobber()) + return true; + + // Don't allow inline asm to define NewReg at all. Who know what it's + // doing with it. + if (MI->isInlineAsm()) + return true; + } + } + return false; +} + +unsigned CriticalAntiDepBreaker:: +findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC, + SmallVector<unsigned, 2> &Forbid) +{ + ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned NewReg = Order[i]; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg) continue; + // If any instructions that define AntiDepReg also define the NewReg, it's + // not suitable. For example, Instruction with multiple definitions can + // result in this condition. + if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) + && "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) + && "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] != ~0u || + Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || + KillIndices[AntiDepReg] > DefIndices[NewReg]) + continue; + // If NewReg overlaps any of the forbidden registers, we can't use it. + bool Forbidden = false; + for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(), + ite = Forbid.end(); it != ite; ++it) + if (TRI->regsOverlap(NewReg, *it)) { + Forbidden = true; + break; + } + if (Forbidden) continue; + return NewReg; + } + + // No registers are free and available! + return 0; +} + +unsigned CriticalAntiDepBreaker:: +BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) { + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return 0; + + // Keep a map of the MachineInstr*'s back to the SUnit representing them. + // This is used for updating debug information. + // + // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap + DenseMap<MachineInstr*,const SUnit*> MISUnitMap; + + // Find the node at the bottom of the critical path. + const SUnit *Max = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + MISUnitMap[SU->getInstr()] = SU; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) + Max = SU; + } + +#ifndef NDEBUG + { + DEBUG(dbgs() << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"); + DEBUG(dbgs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] == ~0u) + DEBUG(dbgs() << " " << TRI->getName(Reg)); + } + DEBUG(dbgs() << '\n'); + } +#endif + + // Track progress along the critical path through the SUnit graph as we walk + // the instructions. + const SUnit *CriticalPathSU = Max; + MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); + + // Consider this pattern: + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // There are three anti-dependencies here, and without special care, + // we'd break all of them using the same register: + // A = ... + // ... = A + // B = ... + // ... = B + // B = ... + // ... = B + // B = ... + // ... = B + // because at each anti-dependence, B is the first register that + // isn't A which is free. This re-introduces anti-dependencies + // at all but one of the original anti-dependencies that we were + // trying to break. To avoid this, keep track of the most recent + // register that each register was replaced with, avoid + // using it to repair an anti-dependence on the same register. + // This lets us produce this: + // A = ... + // ... = A + // B = ... + // ... = B + // C = ... + // ... = C + // B = ... + // ... = B + // This still has an anti-dependence on B, but at least it isn't on the + // original critical path. + // + // TODO: If we tracked more than one register here, we could potentially + // fix that remaining critical edge too. This is a little more involved, + // because unlike the most recent register, less recent registers should + // still be considered, though only if no other registers are available. + std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0); + + // Attempt to break anti-dependence edges on the critical path. Walk the + // instructions from the bottom up, tracking information about liveness + // as we go to help determine which registers are available. + unsigned Broken = 0; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = End, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Check if this instruction has a dependence on the critical path that + // is an anti-dependence that we may be able to break. If it is, set + // AntiDepReg to the non-zero register associated with the anti-dependence. + // + // We limit our attention to the critical path as a heuristic to avoid + // breaking anti-dependence edges that aren't going to significantly + // impact the overall schedule. There are a limited number of registers + // and we want to save them for the important edges. + // + // TODO: Instructions with multiple defs could have multiple + // anti-dependencies. The current code here only knows how to break one + // edge per instruction. Note that we'd have to be able to break all of + // the anti-dependencies in an instruction in order to be effective. + unsigned AntiDepReg = 0; + if (MI == CriticalPathMI) { + if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { + const SUnit *NextSU = Edge->getSUnit(); + + // Only consider anti-dependence edges. + if (Edge->getKind() == SDep::Anti) { + AntiDepReg = Edge->getReg(); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + if (!MRI.isAllocatable(AntiDepReg)) + // Don't break anti-dependencies on non-allocatable registers. + AntiDepReg = 0; + else if (KeepRegs.test(AntiDepReg)) + // Don't break anti-dependencies if an use down below requires + // this exact register. + AntiDepReg = 0; + else { + // If the SUnit has other dependencies on the SUnit that it + // anti-depends on, don't bother breaking the anti-dependency + // since those edges would prevent such units from being + // scheduled past each other regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), + PE = CriticalPathSU->Preds.end(); P != PE; ++P) + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + } + CriticalPathSU = NextSU; + CriticalPathMI = CriticalPathSU->getInstr(); + } else { + // We've reached the end of the critical path. + CriticalPathSU = 0; + CriticalPathMI = 0; + } + } + + PrescanInstruction(MI); + + SmallVector<unsigned, 2> ForbidRegs; + + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) + // If this instruction's defs have special allocation requirement, don't + // break this anti-dependency. + AntiDepReg = 0; + else if (AntiDepReg) { + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. If the instruction defines other registers, + // save a list of them so that we don't pick a new register + // that overlaps any of them. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { + AntiDepReg = 0; + break; + } + if (MO.isDef() && Reg != AntiDepReg) + ForbidRegs.push_back(Reg); + } + } + + // Determine AntiDepReg's register class, if it is live and is + // consistently used within a single class. + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; + assert((AntiDepReg == 0 || RC != NULL) && + "Register should be live if it's causing an anti-dependence!"); + if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) + AntiDepReg = 0; + + // Look for a suitable register to use to break the anti-depenence. + // + // TODO: Instead of picking the first free register, consider which might + // be the best. + if (AntiDepReg != 0) { + std::pair<std::multimap<unsigned, MachineOperand *>::iterator, + std::multimap<unsigned, MachineOperand *>::iterator> + Range = RegRefs.equal_range(AntiDepReg); + if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, + AntiDepReg, + LastNewReg[AntiDepReg], + RC, ForbidRegs)) { + DEBUG(dbgs() << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"); + + // Update the references to the old register to refer to the new + // register. + for (std::multimap<unsigned, MachineOperand *>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) { + Q->second->setReg(NewReg); + // If the SU for the instruction being updated has debug information + // related to the anti-dependency register, make sure to update that + // as well. + const SUnit *SU = MISUnitMap[Q->second->getParent()]; + if (!SU) continue; + for (DbgValueVector::iterator DVI = DbgValues.begin(), + DVE = DbgValues.end(); DVI != DVE; ++DVI) + if (DVI->second == Q->second->getParent()) + UpdateDbgValue(DVI->first, AntiDepReg, NewReg); + } + + // We just went back in time and modified history; the + // liveness information for the anti-dependence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + LastNewReg[AntiDepReg] = NewReg; + ++Broken; + } + } + + ScanInstruction(MI, Count); + } + + return Broken; +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h new file mode 100644 index 0000000..df13dd3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -0,0 +1,110 @@ +//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H +#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H + +#include "AntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include <map> + +namespace llvm { +class RegisterClassInfo; +class TargetInstrInfo; +class TargetRegisterInfo; + + class CriticalAntiDepBreaker : public AntiDepBreaker { + MachineFunction& MF; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RegisterClassInfo &RegClassInfo; + + /// AllocatableSet - The set of allocatable registers. + /// We'll be ignoring anti-dependencies on non-allocatable registers, + /// because they may not be safe to break. + const BitVector AllocatableSet; + + /// Classes - For live regs that are only used in one register class in a + /// live range, the register class. If the register is not live, the + /// corresponding value is null. If the register is live but used in + /// multiple register classes, the corresponding value is -1 casted to a + /// pointer. + std::vector<const TargetRegisterClass*> Classes; + + /// RegRefs - Map registers to all their references within a live range. + std::multimap<unsigned, MachineOperand *> RegRefs; + typedef std::multimap<unsigned, MachineOperand *>::const_iterator + RegRefIter; + + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. + std::vector<unsigned> KillIndices; + + /// DefIndices - The index of the most recent complete def (proceding bottom + /// up), or ~0u if the register is live. + std::vector<unsigned> DefIndices; + + /// KeepRegs - A set of registers which are live and cannot be changed to + /// break anti-dependencies. + BitVector KeepRegs; + + public: + CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); + ~CriticalAntiDepBreaker(); + + /// Start - Initialize anti-dep breaking for a new basic block. + void StartBlock(MachineBasicBlock *BB); + + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path + /// of the ScheduleDAG and break them by renaming registers. + /// + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + + /// Finish - Finish anti-dep breaking for a basic block. + void FinishBlock(); + + private: + void PrescanInstruction(MachineInstr *MI); + void ScanInstruction(MachineInstr *MI, unsigned Count); + bool isNewRegClobberedByRefs(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned NewReg); + unsigned findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC, + SmallVector<unsigned, 2> &Forbid); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp new file mode 100644 index 0000000..840a101 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -0,0 +1,225 @@ +//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This class implements a deterministic finite automaton (DFA) based +// packetizing mechanism for VLIW architectures. It provides APIs to +// determine whether there exists a legal mapping of instructions to +// functional unit assignments in a packet. The DFA is auto-generated from +// the target's Schedule.td file. +// +// A DFA consists of 3 major elements: states, inputs, and transitions. For +// the packetizing mechanism, the input is the set of instruction classes for +// a target. The state models all possible combinations of functional unit +// consumption for a given set of instructions in a packet. A transition +// models the addition of an instruction to a packet. In the DFA constructed +// by this class, if an instruction can be added to a packet, then a valid +// transition exists from the corresponding state. Invalid transitions +// indicate that the instruction cannot be added to the current packet. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], + const unsigned *SET): + InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), + DFAStateEntryTable(SET) {} + + +// +// ReadTable - Read the DFA transition table and update CachedTable. +// +// Format of the transition tables: +// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid +// transitions +// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable +// for the ith state +// +void DFAPacketizer::ReadTable(unsigned int state) { + unsigned ThisState = DFAStateEntryTable[state]; + unsigned NextStateInTable = DFAStateEntryTable[state+1]; + // Early exit in case CachedTable has already contains this + // state's transitions. + if (CachedTable.count(UnsignPair(state, + DFAStateInputTable[ThisState][0]))) + return; + + for (unsigned i = ThisState; i < NextStateInTable; i++) + CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = + DFAStateInputTable[i][1]; +} + + +// canReserveResources - Check if the resources occupied by a MCInstrDesc +// are available in the current state. +bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + return (CachedTable.count(StateTrans) != 0); +} + + +// reserveResources - Reserve the resources occupied by a MCInstrDesc and +// change the current state to reflect that change. +void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + assert(CachedTable.count(StateTrans) != 0); + CurrentState = CachedTable[StateTrans]; +} + + +// canReserveResources - Check if the resources occupied by a machine +// instruction are available in the current state. +bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + return canReserveResources(&MID); +} + +// reserveResources - Reserve the resources occupied by a machine +// instruction and change the current state to reflect that change. +void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + reserveResources(&MID); +} + +namespace llvm { +// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides +// Schedule method to build the dependence graph. +class DefaultVLIWScheduler : public ScheduleDAGInstrs { +public: + DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, + MachineDominatorTree &MDT, bool IsPostRA); + // Schedule - Actual scheduling work. + void schedule(); +}; +} + +DefaultVLIWScheduler::DefaultVLIWScheduler( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + bool IsPostRA) : + ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { + CanHandleTerminators = true; +} + +void DefaultVLIWScheduler::schedule() { + // Build the scheduling graph. + buildSchedGraph(0); +} + +// VLIWPacketizerList Ctor +VLIWPacketizerList::VLIWPacketizerList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + bool IsPostRA) : TM(MF.getTarget()), MF(MF) { + TII = TM.getInstrInfo(); + ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); +} + +// VLIWPacketizerList Dtor +VLIWPacketizerList::~VLIWPacketizerList() { + if (VLIWScheduler) + delete VLIWScheduler; + + if (ResourceTracker) + delete ResourceTracker; +} + +// endPacket - End the current packet, bundle packet instructions and reset +// DFA state. +void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + if (CurrentPacketMIs.size() > 1) { + MachineInstr *MIFirst = CurrentPacketMIs.front(); + finalizeBundle(*MBB, MIFirst, MI); + } + CurrentPacketMIs.clear(); + ResourceTracker->clearResources(); +} + +// PacketizeMIs - Bundle machine instructions into packets. +void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, + MachineBasicBlock::iterator BeginItr, + MachineBasicBlock::iterator EndItr) { + assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); + VLIWScheduler->startBlock(MBB); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->schedule(); + + // Generate MI -> SU map. + MIToSUnit.clear(); + for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { + SUnit *SU = &VLIWScheduler->SUnits[i]; + MIToSUnit[SU->getInstr()] = SU; + } + + // The main packetizer loop. + for (; BeginItr != EndItr; ++BeginItr) { + MachineInstr *MI = BeginItr; + + this->initPacketizerState(); + + // End the current packet if needed. + if (this->isSoloInstruction(MI)) { + endPacket(MBB, MI); + continue; + } + + // Ignore pseudo instructions. + if (this->ignorePseudoInstruction(MI, MBB)) + continue; + + SUnit *SUI = MIToSUnit[MI]; + assert(SUI && "Missing SUnit Info!"); + + // Ask DFA if machine resource is available for MI. + bool ResourceAvail = ResourceTracker->canReserveResources(MI); + if (ResourceAvail) { + // Dependency check for MI with instructions in CurrentPacketMIs. + for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); VI != VE; ++VI) { + MachineInstr *MJ = *VI; + SUnit *SUJ = MIToSUnit[MJ]; + assert(SUJ && "Missing SUnit Info!"); + + // Is it legal to packetize SUI and SUJ together. + if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { + // Allow packetization if dependency can be pruned. + if (!this->isLegalToPruneDependencies(SUI, SUJ)) { + // End the packet if dependency cannot be pruned. + endPacket(MBB, MI); + break; + } // !isLegalToPruneDependencies. + } // !isLegalToPacketizeTogether. + } // For all instructions in CurrentPacketMIs. + } else { + // End the packet if resource is not available. + endPacket(MBB, MI); + } + + // Add MI to the current packet. + BeginItr = this->addToPacket(MI); + } // For all instructions in BB. + + // End any packet left behind. + endPacket(MBB, EndItr); + VLIWScheduler->exitRegion(); + VLIWScheduler->finishBlock(); +} diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp new file mode 100644 index 0000000..a54217f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -0,0 +1,190 @@ +//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level dead-code-elimination pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-dce" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +STATISTIC(NumDeletes, "Number of dead instructions deleted"); + +namespace { + class DeadMachineInstructionElim : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &MF); + + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + BitVector LivePhysRegs; + + public: + static char ID; // Pass identification, replacement for typeid + DeadMachineInstructionElim() : MachineFunctionPass(ID) { + initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry()); + } + + private: + bool isDead(const MachineInstr *MI) const; + }; +} +char DeadMachineInstructionElim::ID = 0; +char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; + +INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", + "Remove dead machine instructions", false, false) + +bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { + // Technically speaking inline asm without side effects and no defs can still + // be deleted. But there is so much bad inline asm code out there, we should + // let them be. + if (MI->isInlineAsm()) + return false; + + // Don't delete instructions with side effects. + bool SawStore = false; + if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI()) + return false; + + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Don't delete live physreg defs, or any reserved register defs. + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) + return false; + } else { + if (!MRI->use_nodbg_empty(Reg)) + // This def has a non-debug use. Don't delete the instruction! + return false; + } + } + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { + bool AnyChanges = false; + MRI = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + + // Loop over all instructions in all blocks, from bottom to top, so that it's + // more likely that chains of dependent but ultimately dead instructions will + // be cleaned up. + for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); + I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + // Start out assuming that reserved registers are live out of this block. + LivePhysRegs = MRI->getReservedRegs(); + + // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not + // live across blocks, but some targets (x86) can have flags live out of a + // block. + for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), + E = MBB->succ_end(); S != E; S++) + for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin(); + LI != (*S)->livein_end(); LI++) + LivePhysRegs.set(*LI); + + // Now scan the instructions and delete dead ones, tracking physreg + // liveness as we go. + for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), + MIE = MBB->rend(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // If the instruction is dead, delete it! + if (isDead(MI)) { + DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I!=E; I=nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand& Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI==MI) + continue; + assert(Use.isDebug()); + UseMI->getOperand(0).setReg(0U); + } + } + AnyChanges = true; + MI->eraseFromParent(); + ++NumDeletes; + MIE = MBB->rend(); + // MII is now pointing to the next instruction to process, + // so don't increment it. + continue; + } + + // Record the physreg defs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + LivePhysRegs.reset(Reg); + // Check the subreg set, not the alias set, because a def + // of a super-register may still be partially live after + // this def. + for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + LivePhysRegs.reset(*SR); + } + } else if (MO.isRegMask()) { + // Register mask of preserved registers. All clobbers are dead. + LivePhysRegs.clearBitsNotInMask(MO.getRegMask()); + } + } + // Record the physreg uses, after the defs, in case a physreg is + // both defined and used in the same instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LivePhysRegs.set(*AI); + } + } + } + + // We didn't delete the current instruction, so increment MII to + // the next one. + ++MII; + } + } + + LivePhysRegs.clear(); + return AnyChanges; +} diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp new file mode 100644 index 0000000..f27ec77 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -0,0 +1,183 @@ +//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass mulches exception handling code into a form adapted to code +// generation. Required if using dwarf exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfehprepare" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +STATISTIC(NumResumesLowered, "Number of resume calls lowered"); + +namespace { + class DwarfEHPrepare : public FunctionPass { + const TargetMachine *TM; + const TargetLoweringBase *TLI; + + // RewindFunction - _Unwind_Resume or the target equivalent. + Constant *RewindFunction; + + bool InsertUnwindResumeCalls(Function &Fn); + Value *GetExceptionObject(ResumeInst *RI); + + public: + static char ID; // Pass identification, replacement for typeid. + DwarfEHPrepare(const TargetMachine *tm) : + FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), + RewindFunction(0) { + initializeDominatorTreePass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + + const char *getPassName() const { + return "Exception handling preparation"; + } + }; +} // end anonymous namespace + +char DwarfEHPrepare::ID = 0; + +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { + return new DwarfEHPrepare(tm); +} + +/// GetExceptionObject - Return the exception object from the value passed into +/// the 'resume' instruction (typically an aggregate). Clean up any dead +/// instructions, including the 'resume' instruction. +Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { + Value *V = RI->getOperand(0); + Value *ExnObj = 0; + InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V); + LoadInst *SelLoad = 0; + InsertValueInst *ExcIVI = 0; + bool EraseIVIs = false; + + if (SelIVI) { + if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) { + ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0)); + if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) && + ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) { + ExnObj = ExcIVI->getOperand(1); + SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1)); + EraseIVIs = true; + } + } + } + + if (!ExnObj) + ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI); + + RI->eraseFromParent(); + + if (EraseIVIs) { + if (SelIVI->getNumUses() == 0) + SelIVI->eraseFromParent(); + if (ExcIVI->getNumUses() == 0) + ExcIVI->eraseFromParent(); + if (SelLoad && SelLoad->getNumUses() == 0) + SelLoad->eraseFromParent(); + } + + return ExnObj; +} + +/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present +/// into calls to the appropriate _Unwind_Resume function. +bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { + bool UsesNewEH = false; + SmallVector<ResumeInst*, 16> Resumes; + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) + Resumes.push_back(RI); + else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) + UsesNewEH = II->getUnwindDest()->isLandingPad(); + } + + if (Resumes.empty()) + return UsesNewEH; + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + LLVMContext &Ctx = Resumes[0]->getContext(); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), + Type::getInt8PtrTy(Ctx), false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); + } + + // Create the basic block where the _Unwind_Resume call will live. + LLVMContext &Ctx = Fn.getContext(); + unsigned ResumesSize = Resumes.size(); + + if (ResumesSize == 1) { + // Instead of creating a new BB and PHI node, just append the call to + // _Unwind_Resume to the end of the single resume block. + ResumeInst *RI = Resumes.front(); + BasicBlock *UnwindBB = RI->getParent(); + Value *ExnObj = GetExceptionObject(RI); + + // Call the _Unwind_Resume function. + CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + + // We never expect _Unwind_Resume to return. + new UnreachableInst(Ctx, UnwindBB); + return true; + } + + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, + "exn.obj", UnwindBB); + + // Extract the exception object from the ResumeInst and add it to the PHI node + // that feeds the _Unwind_Resume call. + for (SmallVectorImpl<ResumeInst*>::iterator + I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { + ResumeInst *RI = *I; + BasicBlock *Parent = RI->getParent(); + BranchInst::Create(UnwindBB, Parent); + + Value *ExnObj = GetExceptionObject(RI); + PN->addIncoming(ExnObj, Parent); + + ++NumResumesLowered; + } + + // Call the function. + CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + + // We never expect _Unwind_Resume to return. + new UnreachableInst(Ctx, UnwindBB); + return true; +} + +bool DwarfEHPrepare::runOnFunction(Function &Fn) { + bool Changed = InsertUnwindResumeCalls(Fn); + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp new file mode 100644 index 0000000..5447df0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -0,0 +1,801 @@ +//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Early if-conversion is for out-of-order CPUs that don't have a lot of +// predicable instructions. The goal is to eliminate conditional branches that +// may mispredict. +// +// Instructions from both sides of the branch are executed specutatively, and a +// cmov instruction selects the result. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "early-ifcvt" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +// Absolute maximum number of instructions allowed per speculated block. +// This bypasses all other heuristics, so it should be set fairly high. +static cl::opt<unsigned> +BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, + cl::desc("Maximum number of instructions per speculated block.")); + +// Stress testing mode - disable heuristics. +static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, + cl::desc("Turn all knobs to 11")); + +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); + +//===----------------------------------------------------------------------===// +// SSAIfConv +//===----------------------------------------------------------------------===// +// +// The SSAIfConv class performs if-conversion on SSA form machine code after +// determining if it is possible. The class contains no heuristics; external +// code should be used to determine when if-conversion is a good idea. +// +// SSAIfConv can convert both triangles and diamonds: +// +// Triangle: Head Diamond: Head +// | \ / \_ +// | \ / | +// | [TF]BB FBB TBB +// | / \ / +// | / \ / +// Tail Tail +// +// Instructions in the conditional blocks TBB and/or FBB are spliced into the +// Head block, and phis in the Tail block are converted to select instructions. +// +namespace { +class SSAIfConv { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + +public: + /// The block containing the conditional branch. + MachineBasicBlock *Head; + + /// The block containing phis after the if-then-else. + MachineBasicBlock *Tail; + + /// The 'true' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *TBB; + + /// The 'false' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *FBB; + + /// isTriangle - When there is no 'else' block, either TBB or FBB will be + /// equal to Tail. + bool isTriangle() const { return TBB == Tail || FBB == Tail; } + + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + + /// Information about each phi in the Tail block. + struct PHIInfo { + MachineInstr *PHI; + unsigned TReg, FReg; + // Latencies from Cond+Branch, TReg, and FReg to DstReg. + int CondCycles, TCycles, FCycles; + + PHIInfo(MachineInstr *phi) + : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} + }; + + SmallVector<PHIInfo, 8> PHIs; + +private: + /// The branch condition determined by AnalyzeBranch. + SmallVector<MachineOperand, 4> Cond; + + /// Instructions in Head that define values used by the conditional blocks. + /// The hoisted instructions must be inserted after these instructions. + SmallPtrSet<MachineInstr*, 8> InsertAfter; + + /// Register units clobbered by the conditional blocks. + BitVector ClobberedRegUnits; + + // Scratch pad for findInsertionPoint. + SparseSet<unsigned> LiveRegUnits; + + /// Insertion point in Head for speculatively executed instructions form TBB + /// and FBB. + MachineBasicBlock::iterator InsertionPoint; + + /// Return true if all non-terminator instructions in MBB can be safely + /// speculated. + bool canSpeculateInstrs(MachineBasicBlock *MBB); + + /// Find a valid insertion point in Head. + bool findInsertionPoint(); + + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + +public: + /// runOnMachineFunction - Initialize per-function data structures. + void runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LiveRegUnits.clear(); + LiveRegUnits.setUniverse(TRI->getNumRegUnits()); + ClobberedRegUnits.clear(); + ClobberedRegUnits.resize(TRI->getNumRegUnits()); + } + + /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, + /// initialize the internal state, and return true. + bool canConvertIf(MachineBasicBlock *MBB); + + /// convertIf - If-convert the last block passed to canConvertIf(), assuming + /// it is possible. Add any erased blocks to RemovedBlocks. + void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks); +}; +} // end anonymous namespace + + +/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely +/// be speculated. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); I != E; ++I) { + if (I->isDebugValue()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + DEBUG(dbgs() << "Can't hoist: " << *I); + return false; + } + + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (I->mayLoad()) { + DEBUG(dbgs() << "Won't speculate load: " << *I); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { + DEBUG(dbgs() << "Can't speculate: " << *I); + return false; + } + + // Check for any dependencies on Head instructions. + for (MIOperands MO(I); MO.isValid(); ++MO) { + if (MO->isRegMask()) { + DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + + // Remember clobbered regunits. + if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI)) + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); + if (DefMI->isTerminator()) { + DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; + } + } + } + return true; +} + + +/// Find an insertion point in Head for the speculated instructions. The +/// insertion point must be: +/// +/// 1. Before any terminators. +/// 2. After any instructions in InsertAfter. +/// 3. Not have any clobbered regunits live. +/// +/// This function sets InsertionPoint and returns true when successful, it +/// returns false if no valid insertion point could be found. +/// +bool SSAIfConv::findInsertionPoint() { + // Keep track of live regunits before the current position. + // Only track RegUnits that are also in ClobberedRegUnits. + LiveRegUnits.clear(); + SmallVector<unsigned, 8> Reads; + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + MachineBasicBlock::iterator I = Head->end(); + MachineBasicBlock::iterator B = Head->begin(); + while (I != B) { + --I; + // Some of the conditional code depends in I. + if (InsertAfter.count(I)) { + DEBUG(dbgs() << "Can't insert code after " << *I); + return false; + } + + // Update live regunits. + for (MIOperands MO(I); MO.isValid(); ++MO) { + // We're ignoring regmask operands. That is conservatively correct. + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // I clobbers Reg, so it isn't live before I. + if (MO->isDef()) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + LiveRegUnits.erase(*Units); + // Unless I reads Reg. + if (MO->readsReg()) + Reads.push_back(Reg); + } + // Anything read by I is live before I. + while (!Reads.empty()) + for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid(); + ++Units) + if (ClobberedRegUnits.test(*Units)) + LiveRegUnits.insert(*Units); + + // We can't insert before a terminator. + if (I != FirstTerm && I->isTerminator()) + continue; + + // Some of the clobbered registers are live before I, not a valid insertion + // point. + if (!LiveRegUnits.empty()) { + DEBUG({ + dbgs() << "Would clobber"; + for (SparseSet<unsigned>::const_iterator + i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) + dbgs() << ' ' << PrintRegUnit(*i, TRI); + dbgs() << " live before " << *I; + }); + continue; + } + + // This is a valid insertion point. + InsertionPoint = I; + DEBUG(dbgs() << "Can insert before " << *I); + return true; + } + DEBUG(dbgs() << "No legal insertion point found.\n"); + return false; +} + + + +/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is +/// a potential candidate for if-conversion. Fill out the internal state. +/// +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { + Head = MBB; + TBB = FBB = Tail = 0; + + if (Head->succ_size() != 2) + return false; + MachineBasicBlock *Succ0 = Head->succ_begin()[0]; + MachineBasicBlock *Succ1 = Head->succ_begin()[1]; + + // Canonicalize so Succ0 has MBB as its single predecessor. + if (Succ0->pred_size() != 1) + std::swap(Succ0, Succ1); + + if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) + return false; + + Tail = Succ0->succ_begin()[0]; + + // This is not a triangle. + if (Tail != Succ1) { + // Check for a diamond. We won't deal with any critical edges. + if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || + Succ1->succ_begin()[0] != Tail) + return false; + DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << "/BB#" << Succ1->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + + // Live-in physregs are tricky to get right when speculating code. + if (!Tail->livein_empty()) { + DEBUG(dbgs() << "Tail has live-ins.\n"); + return false; + } + } else { + DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + } + + // This is a triangle or a diamond. + // If Tail doesn't have any phis, there must be side effects. + if (Tail->empty() || !Tail->front().isPHI()) { + DEBUG(dbgs() << "No phis in tail.\n"); + return false; + } + + // The branch we're looking to eliminate must be analyzable. + Cond.clear(); + if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) { + DEBUG(dbgs() << "Branch not analyzable.\n"); + return false; + } + + // This is weird, probably some sort of degenerate CFG. + if (!TBB) { + DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); + return false; + } + + // AnalyzeBranch doesn't set FBB on a fall-through branch. + // Make sure it is always set. + FBB = TBB == Succ0 ? Succ1 : Succ0; + + // Any phis in the tail block must be convertible to selects. + PHIs.clear(); + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); + for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); + I != E && I->isPHI(); ++I) { + PHIs.push_back(&*I); + PHIInfo &PI = PHIs.back(); + // Find PHI operands corresponding to TPred and FPred. + for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { + if (PI.PHI->getOperand(i+1).getMBB() == TPred) + PI.TReg = PI.PHI->getOperand(i).getReg(); + if (PI.PHI->getOperand(i+1).getMBB() == FPred) + PI.FReg = PI.PHI->getOperand(i).getReg(); + } + assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + + // Get target information. + if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, + PI.CondCycles, PI.TCycles, PI.FCycles)) { + DEBUG(dbgs() << "Can't convert: " << *PI.PHI); + return false; + } + } + + // Check that the conditional instructions can be speculated. + InsertAfter.clear(); + ClobberedRegUnits.reset(); + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + + // Try to find a valid insertion point for the speculated instructions in the + // head basic block. + if (!findInsertionPoint()) + return false; + + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; + return true; +} + +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned DstReg = PI.PHI->getOperand(0).getReg(); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + PI.PHI->eraseFromParent(); + PI.PHI = 0; + } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i-1).setMBB(Head); + PI.PHI->getOperand(i-2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->RemoveOperand(i-1); + PI.PHI->RemoveOperand(i-2); + } + } + DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks erased will be added to RemovedBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + if (TBB != Tail) + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + if (FBB != Tail) + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); + + // Fix up the CFG, temporarily leave Head without any successors. + Head->removeSuccessor(TBB); + Head->removeSuccessor(FBB); + if (TBB != Tail) + TBB->removeSuccessor(Tail); + if (FBB != Tail) + FBB->removeSuccessor(Tail); + + // Fix up Head's terminators. + // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); + TII->RemoveBranch(*Head); + + // Erase the now empty conditional blocks. It is likely that Head can fall + // through to Tail, and we can join the two blocks. + if (TBB != Tail) { + RemovedBlocks.push_back(TBB); + TBB->eraseFromParent(); + } + if (FBB != Tail) { + RemovedBlocks.push_back(FBB); + FBB->eraseFromParent(); + } + + assert(Head->succ_empty() && "Additional head successors?"); + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { + // Splice Tail onto the end of Head. + DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() + << " into head BB#" << Head->getNumber() << '\n'); + Head->splice(Head->end(), Tail, + Tail->begin(), Tail->end()); + Head->transferSuccessorsAndUpdatePHIs(Tail); + RemovedBlocks.push_back(Tail); + Tail->eraseFromParent(); + } else { + // We need a branch to Tail, let code placement work it out later. + DEBUG(dbgs() << "Converting to unconditional branch.\n"); + SmallVector<MachineOperand, 0> EmptyCond; + TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL); + Head->addSuccessor(Tail); + } + DEBUG(dbgs() << *Head); +} + + +//===----------------------------------------------------------------------===// +// EarlyIfConverter Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfConverter : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfConverter() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + const char *getPassName() const { return "Early If-Conversion"; } + +private: + bool tryConvertIf(MachineBasicBlock*); + void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); + void updateLoops(ArrayRef<MachineBasicBlock*> Removed); + void invalidateTraces(); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +char EarlyIfConverter::ID = 0; +char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) + +void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Update the dominator tree after if-conversion erased some blocks. +void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) { + // convertIf can remove TBB, FBB, and Tail can be merged into Head. + // TBB and FBB should not dominate any blocks. + // Tail children should be transferred to Head. + MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); + for (unsigned i = 0, e = Removed.size(); i != e; ++i) { + MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + assert(Node != HeadNode && "Cannot erase the head node"); + while (Node->getNumChildren()) { + assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); + DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); + } + DomTree->eraseNode(Removed[i]); + } +} + +/// Update LoopInfo after if-conversion. +void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { + if (!Loops) + return; + // If-conversion doesn't change loop structure, and it doesn't mess with back + // edges, so updating LoopInfo is simply removing the dead blocks. + for (unsigned i = 0, e = Removed.size(); i != e; ++i) + Loops->removeBlock(Removed[i]); +} + +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->verifyAnalysis(); + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); + Traces->verifyAnalysis(); +} + +// Adjust cycles with downward saturation. +static unsigned adjCycles(unsigned Cyc, int Delta) { + if (Delta < 0 && Cyc + Delta > Cyc) + return 0; + return Cyc + Delta; +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); + MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); + DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), + FBBTrace.getCriticalPath()); + + // Set a somewhat arbitrary limit on the critical path extension we accept. + unsigned CritLimit = SchedModel->MispredictPenalty/2; + + // If-conversion only makes sense when there is unexploited ILP. Compute the + // maximum-ILP resource length of the trace after if-conversion. Compare it + // to the shortest critical path. + SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; + if (IfConv.TBB != IfConv.Tail) + ExtraBlocks.push_back(IfConv.TBB); + unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); + DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); + if (ResLength > MinCrit + CritLimit) { + DEBUG(dbgs() << "Not enough available ILP.\n"); + return false; + } + + // Assume that the depth of the first head terminator will also be the depth + // of the select instruction inserted, as determined by the flag dependency. + // TBB / FBB data dependencies may delay the select even more. + MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); + unsigned BranchDepth = + HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + + // Look at all the tail phis, and compute the critical path extension caused + // by inserting select instructions. + MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { + SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + unsigned Slack = TailTrace.getInstrSlack(PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + + // The condition is pulled into the critical path. + unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); + if (CondDepth > MaxDepth) { + unsigned Extra = CondDepth - MaxDepth; + DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The TBB value is pulled into the critical path. + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + if (TDepth > MaxDepth) { + unsigned Extra = TDepth - MaxDepth; + DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The FBB value is pulled into the critical path. + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + if (FDepth > MaxDepth) { + unsigned Extra = FDepth - MaxDepth; + DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + } + return true; +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { + // If-convert MBB and update analyses. + invalidateTraces(); + SmallVector<MachineBasicBlock*, 4> RemovedBlocks; + IfConv.convertIf(RemovedBlocks); + Changed = true; + updateDomTree(RemovedBlocks); + updateLoops(RemovedBlocks); + } + return Changed; +} + +bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" + << "********** Function: " << MF.getName() << '\n'); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + SchedModel = + MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); + MRI = &MF.getRegInfo(); + DomTree = &getAnalysis<MachineDominatorTree>(); + Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (po_iterator<MachineDominatorTree*> + I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I) + if (tryConvertIf(I->getBlock())) + Changed = true; + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp new file mode 100644 index 0000000..3bb0465 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp @@ -0,0 +1,97 @@ +//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the EdgeBundles analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +static cl::opt<bool> +ViewEdgeBundles("view-edge-bundles", cl::Hidden, + cl::desc("Pop up a window to show edge bundle graphs")); + +char EdgeBundles::ID = 0; + +INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges", + /* cfg = */true, /* analysis = */ true) + +char &llvm::EdgeBundlesID = EdgeBundles::ID; + +void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + EC.clear(); + EC.grow(2 * MF->getNumBlockIDs()); + + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { + const MachineBasicBlock &MBB = *I; + unsigned OutE = 2 * MBB.getNumber() + 1; + // Join the outgoing bundle with the ingoing bundles of all successors. + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + EC.join(OutE, 2 * (*SI)->getNumber()); + } + EC.compress(); + if (ViewEdgeBundles) + view(); + + // Compute the reverse mapping. + Blocks.clear(); + Blocks.resize(getNumBundles()); + + for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) { + unsigned b0 = getBundle(i, 0); + unsigned b1 = getBundle(i, 1); + Blocks[b0].push_back(i); + if (b1 != b0) + Blocks[b1].push_back(i); + } + + return false; +} + +/// view - Visualize the annotated bipartite CFG with Graphviz. +void EdgeBundles::view() const { + ViewGraph(*this, "EdgeBundles"); +} + +/// Specialize WriteGraph, the standard implementation won't work. +raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, + const Twine &Title) { + const MachineFunction *MF = G.getMachineFunction(); + + O << "digraph {\n"; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + unsigned BB = I->getNumber(); + O << "\t\"BB#" << BB << "\" [ shape=box ]\n" + << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n" + << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n'; + for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(), + SE = I->succ_end(); SI != SE; ++SI) + O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber() + << "\" [ color=lightgray ]\n"; + } + O << "}\n"; + return O; +} diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp new file mode 100644 index 0000000..8a1e2d9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp @@ -0,0 +1,81 @@ +//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Erlang/OTP runtime-compatible garbage collector +// (e.g. defines safe points, root initialization etc.) +// +// The frametable emitter is in ErlangGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGC : public GCStrategy { + MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const; + public: + ErlangGC(); + bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF); + }; + +} + +static GCRegistry::Add<ErlangGC> +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGC() { } + +ErlangGC::ErlangGC() { + InitRoots = false; + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; + CustomRoots = false; + CustomSafePoints = true; +} + +MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; + ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + MI != ME; ++MI) + + if (MI->getDesc().isCall()) { + + // Do not treat tail call sites as safe points. + if (MI->getDesc().isTerminator()) + continue; + + /* Code copied from VisitCallPoint(...) */ + MachineBasicBlock::iterator RAI = MI; ++RAI; + MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc()); + FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc()); + } + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp new file mode 100644 index 0000000..9b0e76f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -0,0 +1,725 @@ +//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the execution dependency fix pass. +// +// Some X86 SSE instructions like mov, and, or, xor are available in different +// variants for different operand types. These variant instructions are +// equivalent, but on Nehalem and newer cpus there is extra latency +// transferring data between integer and floating point domains. ARM cores +// have similar issues when they are configured with both VFP and NEON +// pipelines. +// +// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "execution-fix" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact that the register is now available in multiple +/// domains. +namespace { +struct DomainValue { + // Basic reference counting. + unsigned Refs; + + // Bitmask of available domains. For an open DomainValue, it is the still + // possible domains for collapsing. For a collapsed DomainValue it is the + // domains where the register is available for free. + unsigned AvailableDomains; + + // Pointer to the next DomainValue in a chain. When two DomainValues are + // merged, Victim.Next is set to point to Victor, so old DomainValue + // references can be updated by following the chain. + DomainValue *Next; + + // Twiddleable instructions using or defining these registers. + SmallVector<MachineInstr*, 8> Instrs; + + // A collapsed DomainValue has no instructions to twiddle - it simply keeps + // track of the domains where the registers are already available. + bool isCollapsed() const { return Instrs.empty(); } + + // Is domain available? + bool hasDomain(unsigned domain) const { + return AvailableDomains & (1u << domain); + } + + // Mark domain as available. + void addDomain(unsigned domain) { + AvailableDomains |= 1u << domain; + } + + // Restrict to a single domain available. + void setSingleDomain(unsigned domain) { + AvailableDomains = 1u << domain; + } + + // Return bitmask of domains that are available and in mask. + unsigned getCommonDomains(unsigned mask) const { + return AvailableDomains & mask; + } + + // First domain available. + unsigned getFirstDomain() const { + return CountTrailingZeros_32(AvailableDomains); + } + + DomainValue() : Refs(0) { clear(); } + + // Clear this DomainValue and point to next which has all its data. + void clear() { + AvailableDomains = 0; + Next = 0; + Instrs.clear(); + } +}; +} + +namespace { +/// LiveReg - Information about a live register. +struct LiveReg { + /// Value currently in this register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + DomainValue *Value; + + /// Instruction that defined this register, relative to the beginning of the + /// current basic block. When a LiveReg is used to represent a live-out + /// register, this value is relative to the end of the basic block, so it + /// will be a negative number. + int Def; +}; +} // anonynous namespace + +namespace { +class ExeDepsFix : public MachineFunctionPass { + static char ID; + SpecificBumpPtrAllocator<DomainValue> Allocator; + SmallVector<DomainValue*,16> Avail; + + const TargetRegisterClass *const RC; + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + std::vector<int> AliasMap; + const unsigned NumRegs; + LiveReg *LiveRegs; + typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; + LiveOutMap LiveOuts; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// True when the current block has a predecessor that hasn't been visited + /// yet. + bool SeenUnknownBackEdge; + +public: + ExeDepsFix(const TargetRegisterClass *rc) + : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "Execution dependency fix"; + } + +private: + // Register mapping. + int regIndex(unsigned Reg); + + // DomainValue allocation. + DomainValue *alloc(int domain = -1); + DomainValue *retain(DomainValue *DV) { + if (DV) ++DV->Refs; + return DV; + } + void release(DomainValue*); + DomainValue *resolve(DomainValue*&); + + // LiveRegs manipulations. + void setLiveReg(int rx, DomainValue *DV); + void kill(int rx); + void force(int rx, unsigned domain); + void collapse(DomainValue *dv, unsigned domain); + bool merge(DomainValue *A, DomainValue *B); + + void enterBasicBlock(MachineBasicBlock*); + void leaveBasicBlock(MachineBasicBlock*); + void visitInstr(MachineInstr*); + void processDefs(MachineInstr*, bool Kill); + void visitSoftInstr(MachineInstr*, unsigned mask); + void visitHardInstr(MachineInstr*, unsigned domain); +}; +} + +char ExeDepsFix::ID = 0; + +/// Translate TRI register number to an index into our smaller tables of +/// interesting registers. Return -1 for boring registers. +int ExeDepsFix::regIndex(unsigned Reg) { + assert(Reg < AliasMap.size() && "Invalid register"); + return AliasMap[Reg]; +} + +DomainValue *ExeDepsFix::alloc(int domain) { + DomainValue *dv = Avail.empty() ? + new(Allocator.Allocate()) DomainValue : + Avail.pop_back_val(); + if (domain >= 0) + dv->addDomain(domain); + assert(dv->Refs == 0 && "Reference count wasn't cleared"); + assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); + return dv; +} + +/// release - Release a reference to DV. When the last reference is released, +/// collapse if needed. +void ExeDepsFix::release(DomainValue *DV) { + while (DV) { + assert(DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + collapse(DV, DV->getFirstDomain()); + + DomainValue *Next = DV->Next; + DV->clear(); + Avail.push_back(DV); + // Also release the next DomainValue in the chain. + DV = Next; + } +} + +/// resolve - Follow the chain of dead DomainValues until a live DomainValue is +/// reached. Update the referenced pointer when necessary. +DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { + DomainValue *DV = DVRef; + if (!DV || !DV->Next) + return DV; + + // DV has a chain. Find the end. + do DV = DV->Next; + while (DV->Next); + + // Update DVRef to point to DV. + retain(DV); + release(DVRef); + DVRef = DV; + return DV; +} + +/// Set LiveRegs[rx] = dv, updating reference counts. +void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(LiveRegs && "Must enter basic block first."); + + if (LiveRegs[rx].Value == dv) + return; + if (LiveRegs[rx].Value) + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = retain(dv); +} + +// Kill register rx, recycle or collapse any DomainValue. +void ExeDepsFix::kill(int rx) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(LiveRegs && "Must enter basic block first."); + if (!LiveRegs[rx].Value) + return; + + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = 0; +} + +/// Force register rx into domain. +void ExeDepsFix::force(int rx, unsigned domain) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(LiveRegs && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx].Value) { + if (dv->isCollapsed()) + dv->addDomain(domain); + else if (dv->hasDomain(domain)) + collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and + // force the new value into domain. This costs a domain crossing. + collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx].Value && "Not live after collapse?"); + LiveRegs[rx].Value->addDomain(domain); + } + } else { + // Set up basic collapsed DomainValue. + setLiveReg(rx, alloc(domain)); + } +} + +/// Collapse open DomainValue into given domain. If there are multiple +/// registers using dv, they each get a unique collapsed DomainValue. +void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { + assert(dv->hasDomain(domain) && "Cannot collapse"); + + // Collapse all the instructions. + while (!dv->Instrs.empty()) + TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); + dv->setSingleDomain(domain); + + // If there are multiple users, give them new, unique DomainValues. + if (LiveRegs && dv->Refs > 1) + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx].Value == dv) + setLiveReg(rx, alloc(domain)); +} + +/// Merge - All instructions and registers in B are moved to A, and B is +/// released. +bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { + assert(!A->isCollapsed() && "Cannot merge into collapsed"); + assert(!B->isCollapsed() && "Cannot merge from collapsed"); + if (A == B) + return true; + // Restrict to the domains that A and B have in common. + unsigned common = A->getCommonDomains(B->AvailableDomains); + if (!common) + return false; + A->AvailableDomains = common; + A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + + // Clear the old DomainValue so we won't try to swizzle instructions twice. + B->clear(); + // All uses of B are referred to A. + B->Next = retain(A); + + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx].Value == B) + setLiveReg(rx, A); + return true; +} + +// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { + // Detect back-edges from predecessors we haven't processed yet. + SeenUnknownBackEdge = false; + + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up LiveRegs to represent registers entering MBB. + if (!LiveRegs) + LiveRegs = new LiveReg[NumRegs]; + + // Default values are 'nothing happened a long time ago'. + for (unsigned rx = 0; rx != NumRegs; ++rx) { + LiveRegs[rx].Value = 0; + LiveRegs[rx].Def = -(1 << 20); + } + + // This is the entry block. + if (MBB->pred_empty()) { + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), + e = MBB->livein_end(); i != e; ++i) { + int rx = regIndex(*i); + if (rx < 0) + continue; + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) { + SeenUnknownBackEdge = true; + continue; + } + assert(fi->second && "Can't have NULL entries"); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + // Use the most recent predecessor def for each register. + LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def); + + DomainValue *pdv = resolve(fi->second[rx].Value); + if (!pdv) + continue; + if (!LiveRegs[rx].Value) { + setLiveReg(rx, pdv); + continue; + } + + // We have a live DomainValue from more than one predecessor. + if (LiveRegs[rx].Value->isCollapsed()) { + // We are already collapsed, but predecessor is not. Force him. + unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); + continue; + } + + // Currently open, merge in predecessor. + if (!pdv->isCollapsed()) + merge(LiveRegs[rx].Value, pdv); + else + force(rx, pdv->getFirstDomain()); + } + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n")); +} + +void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { + assert(LiveRegs && "Must enter basic block first."); + // Save live registers at end of MBB - used by enterBasicBlock(). + // Also use LiveOuts as a visited set to detect back-edges. + bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second; + + if (First) { + // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to + // the end of this block instead of the beginning. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + LiveRegs[i].Def -= CurInstr; + } else { + // Insertion failed, this must be the second pass. + // Release all the DomainValues instead of keeping them. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + release(LiveRegs[i].Value); + delete[] LiveRegs; + } + LiveRegs = 0; +} + +void ExeDepsFix::visitInstr(MachineInstr *MI) { + if (MI->isDebugValue()) + return; + + // Update instructions with explicit execution domains. + std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); + else + visitHardInstr(MI, DomP.first); + } + + // Process defs to track register ages, and kill values clobbered by generic + // instructions. + processDefs(MI, !DomP.first); +} + +// Update def-ages for registers defined by MI. +// If Kill is set, also kill off DomainValues clobbered by the defs. +void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugValue() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isImplicit()) + break; + if (MO.isUse()) + continue; + int rx = regIndex(MO.getReg()); + if (rx < 0) + continue; + + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + // How many instructions since rx was last written? + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + + // Verify clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (!Pref) + continue; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + TII->breakPartialRegDependency(MI, i, TRI); + continue; + } + + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK.\n"); + continue; + } + + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + } + + ++CurInstr; +} + +// A hard instruction only works in one domain. All input registers will be +// forced into that domain. +void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { + // Collapse all uses. + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = regIndex(mo.getReg()); + if (rx < 0) continue; + force(rx, domain); + } + + // Kill all defs and force them. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = regIndex(mo.getReg()); + if (rx < 0) continue; + kill(rx); + force(rx, domain); + } +} + +// A soft instruction can be changed to work in other domains given by mask. +void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { + // Bitmask of available domains for this instruction after taking collapsed + // operands into account. + unsigned available = mask; + + // Scan the explicit use operands for incoming domains. + SmallVector<int, 4> used; + if (LiveRegs) + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = regIndex(mo.getReg()); + if (rx < 0) continue; + if (DomainValue *dv = LiveRegs[rx].Value) { + // Bitmask of domains that dv and available have in common. + unsigned common = dv->getCommonDomains(available); + // Is it possible to use this collapsed register for free? + if (dv->isCollapsed()) { + // Restrict available domains to the ones in common with the operand. + // If there are no common domains, we must pay the cross-domain + // penalty for this operand. + if (common) available = common; + } else if (common) + // Open DomainValue is compatible, save it for merging. + used.push_back(rx); + else + // Open DomainValue is not compatible with instruction. It is useless + // now. + kill(rx); + } + } + + // If the collapsed operands force a single domain, propagate the collapse. + if (isPowerOf2_32(available)) { + unsigned domain = CountTrailingZeros_32(available); + TII->setExecutionDomain(mi, domain); + visitHardInstr(mi, domain); + return; + } + + // Kill off any remaining uses that don't match available, and build a list of + // incoming DomainValues that we want to merge. + SmallVector<LiveReg, 4> Regs; + for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + int rx = *i; + const LiveReg &LR = LiveRegs[rx]; + // This useless DomainValue could have been missed above. + if (!LR.Value->getCommonDomains(available)) { + kill(rx); + continue; + } + // Sorted insertion. + bool Inserted = false; + for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end(); + i != e && !Inserted; ++i) { + if (LR.Def < i->Def) { + Inserted = true; + Regs.insert(i, LR); + } + } + if (!Inserted) + Regs.push_back(LR); + } + + // doms are now sorted in order of appearance. Try to merge them all, giving + // priority to the latest ones. + DomainValue *dv = 0; + while (!Regs.empty()) { + if (!dv) { + dv = Regs.pop_back_val().Value; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); + continue; + } + + DomainValue *Latest = Regs.pop_back_val().Value; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; + + // If latest didn't merge, it is useless now. Kill all registers using it. + for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) + if (LiveRegs[*i].Value == Latest) + kill(*i); + } + + // dv is the DomainValue we are going to use for this instruction. + if (!dv) { + dv = alloc(); + dv->AvailableDomains = available; + } + dv->Instrs.push_back(mi); + + // Finally set all defs and non-collapsed uses to dv. We must iterate through + // all the operators, including imp-def ones. + for (MachineInstr::mop_iterator ii = mi->operands_begin(), + ee = mi->operands_end(); + ii != ee; ++ii) { + MachineOperand &mo = *ii; + if (!mo.isReg()) continue; + int rx = regIndex(mo.getReg()); + if (rx < 0) continue; + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { + kill(rx); + setLiveReg(rx, dv); + } + } +} + +bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + TII = MF->getTarget().getInstrInfo(); + TRI = MF->getTarget().getRegisterInfo(); + LiveRegs = 0; + assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + + DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " + << RC->getName() << " **********\n"); + + // If no relevant registers are used in the function, we can skip it + // completely. + bool anyregs = false; + for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (MF->getRegInfo().isPhysRegUsed(*I)) { + anyregs = true; + break; + } + if (!anyregs) return false; + + // Initialize the AliasMap on the first use. + if (AliasMap.empty()) { + // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, + // or -1. + AliasMap.resize(TRI->getNumRegs(), -1); + for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) + for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); + AI.isValid(); ++AI) + AliasMap[*AI] = i; + } + + MachineBasicBlock *Entry = MF->begin(); + ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); + SmallVector<MachineBasicBlock*, 16> Loops; + for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + MachineBasicBlock *MBB = *MBBI; + enterBasicBlock(MBB); + if (SeenUnknownBackEdge) + Loops.push_back(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) + visitInstr(I); + leaveBasicBlock(MBB); + } + + // Visit all the loop blocks again in order to merge DomainValues from + // back-edges. + for (unsigned i = 0, e = Loops.size(); i != e; ++i) { + MachineBasicBlock *MBB = Loops[i]; + enterBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) + if (!I->isDebugValue()) + processDefs(I, false); + leaveBasicBlock(MBB); + } + + // Clear the LiveOuts vectors and collapse any remaining DomainValues. + for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); + if (FI == LiveOuts.end() || !FI->second) + continue; + for (unsigned i = 0, e = NumRegs; i != e; ++i) + if (FI->second[i].Value) + release(FI->second[i].Value); + delete[] FI->second; + } + LiveOuts.clear(); + Avail.clear(); + Allocator.DestroyAll(); + + return false; +} + +FunctionPass * +llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { + return new ExeDepsFix(RC); +} diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp new file mode 100644 index 0000000..b2b6882 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp @@ -0,0 +1,74 @@ +//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Expand Pseudo-instructions produced by ISel. These are usually to allow +// the expansion to contain control flow, such as a conditional move +// implemented with a conditional branch and a phi, or an atomic operation +// implemented with a loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "expand-isel-pseudos" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +namespace { + class ExpandISelPseudos : public MachineFunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandISelPseudos() : MachineFunctionPass(ID) {} + + private: + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} // end anonymous namespace + +char ExpandISelPseudos::ID = 0; +char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; +INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", + "Expand ISel Pseudo-instructions", false, false) + +bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const TargetLowering *TLI = MF.getTarget().getTargetLowering(); + + // Iterate through each instruction in the function, looking for pseudos. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI != MBBE; ) { + MachineInstr *MI = MBBI++; + + // If MI is a pseudo, expand it. + if (MI->usesCustomInsertionHook()) { + Changed = true; + MachineBasicBlock *NewMBB = + TLI->EmitInstrWithCustomInserter(MI, MBB); + // The expansion may involve new basic blocks. + if (NewMBB != MBB) { + MBB = NewMBB; + I = NewMBB; + MBBI = NewMBB->begin(); + MBBE = NewMBB->end(); + } + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp new file mode 100644 index 0000000..1611db8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -0,0 +1,225 @@ +//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo +// instructions after register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "postrapseudos" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +namespace { +struct ExpandPostRA : public MachineFunctionPass { +private: + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + +public: + static char ID; // Pass identification, replacement for typeid + ExpandPostRA() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction&); + +private: + bool LowerSubregToReg(MachineInstr *MI); + bool LowerCopy(MachineInstr *MI); + + void TransferImplicitDefs(MachineInstr *MI); +}; +} // end anonymous namespace + +char ExpandPostRA::ID = 0; +char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; + +INITIALIZE_PASS(ExpandPostRA, "postrapseudos", + "Post-RA pseudo instruction expansion pass", false, false) + +/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered +/// replacement instructions immediately precede it. Copy any implicit-def +/// operands from MI to the replacement instruction. +void +ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) { + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) + continue; + CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); + } +} + +bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && + MI->getOperand(1).isImm() && + (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && + MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned InsReg = MI->getOperand(2).getReg(); + assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); + unsigned SubIdx = MI->getOperand(3).getImm(); + + assert(SubIdx != 0 && "Invalid index for insert_subreg"); + unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Insert destination must be in a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + "Inserted value must be in a physical register"); + + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); + + if (MI->allDefsAreDead()) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "subreg: replaced by: " << *MI); + return true; + } + + if (DstSubReg == InsReg) { + // No need to insert an identify copy instruction. + // Watch out for case like this: + // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 + // We must leave %RAX live. + if (DstReg != InsReg) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->RemoveOperand(3); // SubIdx + MI->RemoveOperand(1); // Imm + DEBUG(dbgs() << "subreg: replace by: " << *MI); + return true; + } + DEBUG(dbgs() << "subreg: eliminated!"); + } else { + TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, + MI->getOperand(2).isKill()); + + // Implicitly define DstReg for subsequent uses. + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + CopyMI->addRegisterDefined(DstReg); + DEBUG(dbgs() << "subreg: " << *CopyMI); + } + + DEBUG(dbgs() << '\n'); + MBB->erase(MI); + return true; +} + +bool ExpandPostRA::LowerCopy(MachineInstr *MI) { + + if (MI->allDefsAreDead()) { + DEBUG(dbgs() << "dead copy: " << *MI); + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); + + if (SrcMO.getReg() == DstMO.getReg()) { + DEBUG(dbgs() << "identity copy: " << *MI); + // No need to insert an identity copy instruction, but replace with a KILL + // if liveness is changed. + if (SrcMO.isUndef() || MI->getNumOperands() > 2) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + // Vanilla identity copy. + MI->eraseFromParent(); + return true; + } + + DEBUG(dbgs() << "real copy: " << *MI); + TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), + DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); + + if (MI->getNumOperands() > 2) + TransferImplicitDefs(MI); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + dbgs() << "replaced by: " << *(--dMI); + }); + MI->eraseFromParent(); + return true; +} + +/// runOnMachineFunction - Reduce subregister inserts and extracts to register +/// copies. +/// +bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Machine Function\n" + << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" + << "********** Function: " << MF.getName() << '\n'); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + + bool MadeChange = false; + + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me;) { + MachineInstr *MI = mi; + // Advance iterator here because MI may be erased. + ++mi; + + // Only expand pseudos. + if (!MI->isPseudo()) + continue; + + // Give targets a chance to expand even standard pseudos. + if (TII->expandPostRAPseudo(MI)) { + MadeChange = true; + continue; + } + + // Expand standard pseudos. + switch (MI->getOpcode()) { + case TargetOpcode::SUBREG_TO_REG: + MadeChange |= LowerSubregToReg(MI); + break; + case TargetOpcode::COPY: + MadeChange |= LowerCopy(MI); + break; + case TargetOpcode::DBG_VALUE: + continue; + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::EXTRACT_SUBREG: + llvm_unreachable("Sub-register pseudos should have been eliminated."); + } + } + } + + return MadeChange; +} diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp new file mode 100644 index 0000000..ef5247c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -0,0 +1,178 @@ +//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the GCFunctionInfo class and GCModuleInfo pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + + class Printer : public FunctionPass { + static char ID; + raw_ostream &OS; + + public: + explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} + + + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; + +} + +INITIALIZE_PASS(GCModuleInfo, "collector-metadata", + "Create Garbage Collector Module Metadata", false, false) + +// ----------------------------------------------------------------------------- + +GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) + : F(F), S(S), FrameSize(~0LL) {} + +GCFunctionInfo::~GCFunctionInfo() {} + +// ----------------------------------------------------------------------------- + +char GCModuleInfo::ID = 0; + +GCModuleInfo::GCModuleInfo() + : ImmutablePass(ID) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); +} + +GCModuleInfo::~GCModuleInfo() { + clear(); +} + +GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, + const std::string &Name) { + strategy_map_type::iterator NMI = StrategyMap.find(Name); + if (NMI != StrategyMap.end()) + return NMI->getValue(); + + for (GCRegistry::iterator I = GCRegistry::begin(), + E = GCRegistry::end(); I != E; ++I) { + if (Name == I->getName()) { + GCStrategy *S = I->instantiate(); + S->M = M; + S->Name = Name; + StrategyMap.GetOrCreateValue(Name).setValue(S); + StrategyList.push_back(S); + return S; + } + } + + dbgs() << "unsupported GC: " << Name << "\n"; + llvm_unreachable(0); +} + +GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { + assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!"); + assert(F.hasGC()); + + finfo_map_type::iterator I = FInfoMap.find(&F); + if (I != FInfoMap.end()) + return *I->second; + + GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC()); + GCFunctionInfo *GFI = S->insertFunctionInfo(F); + FInfoMap[&F] = GFI; + return *GFI; +} + +void GCModuleInfo::clear() { + FInfoMap.clear(); + StrategyMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + StrategyList.clear(); +} + +// ----------------------------------------------------------------------------- + +char Printer::ID = 0; + +FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { + return new Printer(OS); +} + + +const char *Printer::getPassName() const { + return "Print Garbage Collector Information"; +} + +void Printer::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<GCModuleInfo>(); +} + +static const char *DescKind(GC::PointKind Kind) { + switch (Kind) { + case GC::Loop: return "loop"; + case GC::Return: return "return"; + case GC::PreCall: return "pre-call"; + case GC::PostCall: return "post-call"; + } + llvm_unreachable("Invalid point kind"); +} + +bool Printer::runOnFunction(Function &F) { + if (F.hasGC()) return false; + + GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); + + OS << "GC roots for " << FD->getFunction().getName() << ":\n"; + for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), + RE = FD->roots_end(); RI != RE; ++RI) + OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; + + OS << "GC safe points for " << FD->getFunction().getName() << ":\n"; + for (GCFunctionInfo::iterator PI = FD->begin(), + PE = FD->end(); PI != PE; ++PI) { + + OS << "\t" << PI->Label->getName() << ": " + << DescKind(PI->Kind) << ", live = {"; + + for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), + RE = FD->live_end(PI);;) { + OS << " " << RI->Num; + if (++RI == RE) + break; + OS << ","; + } + + OS << " }\n"; + } + + return false; +} + +bool Printer::doFinalization(Module &M) { + GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(GMI && "Printer didn't require GCModuleInfo?!"); + GMI->clear(); + return false; +} diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp new file mode 100644 index 0000000..f80e9ce --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp @@ -0,0 +1,27 @@ +//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract base class GCMetadataPrinter. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadataPrinter.h" +using namespace llvm; + +GCMetadataPrinter::GCMetadataPrinter() { } + +GCMetadataPrinter::~GCMetadataPrinter() { } + +void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) { + // Default is no action. +} + +void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) { + // Default is no action. +} diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp new file mode 100644 index 0000000..1173d11 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -0,0 +1,430 @@ +//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements target- and collector-independent garbage collection +// infrastructure. +// +// GCMachineCodeAnalysis identifies the GC safe points in the machine code. +// Roots are identified in SelectionDAGISel. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace { + + /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or + /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as + /// directed by the GCStrategy. It also performs automatic root initialization + /// and custom intrinsic lowering. + class LowerIntrinsics : public FunctionPass { + static bool NeedsDefaultLoweringPass(const GCStrategy &C); + static bool NeedsCustomLoweringPass(const GCStrategy &C); + static bool CouldBecomeSafePoint(Instruction *I); + bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + static bool InsertRootInitializers(Function &F, + AllocaInst **Roots, unsigned Count); + + public: + static char ID; + + LowerIntrinsics(); + const char *getPassName() const; + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + }; + + + /// GCMachineCodeAnalysis - This is a target-independent pass over the machine + /// function representation to identify safe points for the garbage collector + /// in the machine code. It inserts labels at safe points and populates a + /// GCMetadata record for each function. + class GCMachineCodeAnalysis : public MachineFunctionPass { + const TargetMachine *TM; + GCFunctionInfo *FI; + MachineModuleInfo *MMI; + const TargetInstrInfo *TII; + + void FindSafePoints(MachineFunction &MF); + void VisitCallPoint(MachineBasicBlock::iterator MI); + MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const; + + void FindStackOffsets(MachineFunction &MF); + + public: + static char ID; + + GCMachineCodeAnalysis(); + void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnMachineFunction(MachineFunction &MF); + }; + +} + +// ----------------------------------------------------------------------------- + +GCStrategy::GCStrategy() : + NeededSafePoints(0), + CustomReadBarriers(false), + CustomWriteBarriers(false), + CustomRoots(false), + CustomSafePoints(false), + InitRoots(true), + UsesMetadata(false) +{} + +GCStrategy::~GCStrategy() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; + + Functions.clear(); +} + +bool GCStrategy::initializeCustomLowering(Module &M) { return false; } + +bool GCStrategy::performCustomLowering(Function &F) { + dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; + llvm_unreachable("must override performCustomLowering"); +} + + +bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { + dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; + llvm_unreachable(0); +} + + +GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { + GCFunctionInfo *FI = new GCFunctionInfo(F, *this); + Functions.push_back(FI); + return FI; +} + +// ----------------------------------------------------------------------------- + +INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", + false, false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) + +FunctionPass *llvm::createGCLoweringPass() { + return new LowerIntrinsics(); +} + +char LowerIntrinsics::ID = 0; + +LowerIntrinsics::LowerIntrinsics() + : FunctionPass(ID) { + initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + +const char *LowerIntrinsics::getPassName() const { + return "Lower Garbage Collection Instructions"; +} + +void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<GCModuleInfo>(); + AU.addPreserved<DominatorTree>(); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. +bool LowerIntrinsics::doInitialization(Module &M) { + // FIXME: This is rather antisocial in the context of a JIT since it performs + // work against the entire module. But this cannot be done at + // runFunction time (initializeCustomLowering likely needs to change + // the module). + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && I->hasGC()) + MI->getFunctionInfo(*I); // Instantiate the GC strategy. + + bool MadeChange = false; + for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + if (NeedsCustomLoweringPass(**I)) + if ((*I)->initializeCustomLowering(M)) + MadeChange = true; + + return MadeChange; +} + +bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, + unsigned Count) { + // Scroll past alloca instructions. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + while (isa<AllocaInst>(IP)) ++IP; + + // Search for initializers in the initial BB. + SmallPtrSet<AllocaInst*,16> InitedRoots; + for (; !CouldBecomeSafePoint(IP); ++IP) + if (StoreInst *SI = dyn_cast<StoreInst>(IP)) + if (AllocaInst *AI = + dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) + InitedRoots.insert(AI); + + // Add root initializers. + bool MadeChange = false; + + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) + if (!InitedRoots.count(*I)) { + StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I); + SI->insertAfter(*I); + MadeChange = true; + } + + return MadeChange; +} + +bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) { + // Default lowering is necessary only if read or write barriers have a default + // action. The default for roots is no action. + return !C.customWriteBarrier() + || !C.customReadBarrier() + || C.initializeRoots(); +} + +bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { + // Custom lowering is only necessary if enabled for some action. + return C.customWriteBarrier() + || C.customReadBarrier() + || C.customRoots(); +} + +/// CouldBecomeSafePoint - Predicate to conservatively determine whether the +/// instruction could introduce a safe point. +bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { + // The natural definition of instructions which could introduce safe points + // are: + // + // - call, invoke (AfterCall, BeforeCall) + // - phis (Loops) + // - invoke, ret, unwind (Exit) + // + // However, instructions as seemingly inoccuous as arithmetic can become + // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead + // it is necessary to take a conservative approach. + + if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || + isa<StoreInst>(I) || isa<LoadInst>(I)) + return false; + + // llvm.gcroot is safe because it doesn't do anything at runtime. + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (Function *F = CI->getCalledFunction()) + if (unsigned IID = F->getIntrinsicID()) + if (IID == Intrinsic::gcroot) + return false; + + return true; +} + +/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. +/// Leave gcroot intrinsics; the code generator needs to see those. +bool LowerIntrinsics::runOnFunction(Function &F) { + // Quick exit for functions that do not use GC. + if (!F.hasGC()) + return false; + + GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); + GCStrategy &S = FI.getStrategy(); + + bool MadeChange = false; + + if (NeedsDefaultLoweringPass(S)) + MadeChange |= PerformDefaultLowering(F, S); + + bool UseCustomLoweringPass = NeedsCustomLoweringPass(S); + if (UseCustomLoweringPass) + MadeChange |= S.performCustomLowering(F); + + // Custom lowering may modify the CFG, so dominators must be recomputed. + if (UseCustomLoweringPass) { + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) + DT->DT->recalculate(F); + } + + return MadeChange; +} + +bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { + bool LowerWr = !S.customWriteBarrier(); + bool LowerRd = !S.customReadBarrier(); + bool InitRoots = S.initializeRoots(); + + SmallVector<AllocaInst*, 32> Roots; + + bool MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + if (LowerWr) { + // Replace a write barrier with a simple store. + Value *St = new StoreInst(CI->getArgOperand(0), + CI->getArgOperand(2), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcread: + if (LowerRd) { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcroot: + if (InitRoots) { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back(cast<AllocaInst>( + CI->getArgOperand(0)->stripPointerCasts())); + } + break; + default: + continue; + } + + MadeChange = true; + } + } + } + + if (Roots.size()) + MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + + return MadeChange; +} + +// ----------------------------------------------------------------------------- + +char GCMachineCodeAnalysis::ID = 0; +char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; + +INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", + "Analyze Machine Code For Garbage Collection", false, false) + +GCMachineCodeAnalysis::GCMachineCodeAnalysis() + : MachineFunctionPass(ID) {} + +void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); +} + +MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { + // Find the return address (next instruction), too, so as to bracket the call + // instruction. + MachineBasicBlock::iterator RAI = CI; + ++RAI; + + if (FI->getStrategy().needsSafePoint(GC::PreCall)) { + MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); + FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); + } + + if (FI->getStrategy().needsSafePoint(GC::PostCall)) { + MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); + FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); + } +} + +void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), + BBE = MF.end(); BBI != BBE; ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), + ME = BBI->end(); MI != ME; ++MI) + if (MI->isCall()) + VisitCallPoint(MI); +} + +void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { + const TargetFrameLowering *TFI = TM->getFrameLowering(); + assert(TFI && "TargetRegisterInfo not available!"); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); + RI != FI->roots_end();) { + // If the root references a dead object, no need to keep it. + if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + RI = FI->removeStackRoot(RI); + } else { + RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + ++RI; + } + } +} + +bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { + // Quick exit for functions that do not use GC. + if (!MF.getFunction()->hasGC()) + return false; + + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); + if (!FI->getStrategy().needsSafePoints()) + return false; + + TM = &MF.getTarget(); + MMI = &getAnalysis<MachineModuleInfo>(); + TII = TM->getInstrInfo(); + + // Find the size of the stack frame. + FI->setFrameSize(MF.getFrameInfo()->getStackSize()); + + // Find all safe points. + if (FI->getStrategy().customSafePoints()) { + FI->getStrategy().findCustomSafePoints(*FI, MF); + } else { + FindSafePoints(MF); + } + + // Find the stack offsets for all roots. + FindStackOffsets(MF); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp new file mode 100644 index 0000000..9958d7d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -0,0 +1,1583 @@ +//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level if-conversion pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ifcvt" +#include "llvm/CodeGen/Passes.h" +#include "BranchFolding.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +// Hidden options for help debugging. +static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); +static cl::opt<bool> DisableSimple("disable-ifcvt-simple", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", + cl::init(false), cl::Hidden); +static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold", + cl::init(true), cl::Hidden); + +STATISTIC(NumSimple, "Number of simple if-conversions performed"); +STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); +STATISTIC(NumTriangle, "Number of triangle if-conversions performed"); +STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); +STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); +STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); +STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); +STATISTIC(NumDupBBs, "Number of duplicated blocks"); +STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); + +namespace { + class IfConverter : public MachineFunctionPass { + enum IfcvtKind { + ICNotClassfied, // BB data valid, but not classified. + ICSimpleFalse, // Same as ICSimple, but on the false path. + ICSimple, // BB is entry of an one split, no rejoin sub-CFG. + ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition. + ICTriangleRev, // Same as ICTriangle, but true path rev condition. + ICTriangleFalse, // Same as ICTriangle, but on the false path. + ICTriangle, // BB is entry of a triangle sub-CFG. + ICDiamond // BB is entry of a diamond sub-CFG. + }; + + /// BBInfo - One per MachineBasicBlock, this is used to cache the result + /// if-conversion feasibility analysis. This includes results from + /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its + /// classification, and common tail block of its successors (if it's a + /// diamond shape), its size, whether it's predicable, and whether any + /// instruction can clobber the 'would-be' predicate. + /// + /// IsDone - True if BB is not to be considered for ifcvt. + /// IsBeingAnalyzed - True if BB is currently being analyzed. + /// IsAnalyzed - True if BB has been analyzed (info is still valid). + /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. + /// IsBrAnalyzable - True if AnalyzeBranch() returns false. + /// HasFallThrough - True if BB may fallthrough to the following BB. + /// IsUnpredicable - True if BB is known to be unpredicable. + /// ClobbersPred - True if BB could modify predicates (e.g. has + /// cmp, call, etc.) + /// NonPredSize - Number of non-predicated instructions. + /// ExtraCost - Extra cost for multi-cycle instructions. + /// ExtraCost2 - Some instructions are slower when predicated + /// BB - Corresponding MachineBasicBlock. + /// TrueBB / FalseBB- See AnalyzeBranch(). + /// BrCond - Conditions for end of block conditional branches. + /// Predicate - Predicate used in the BB. + struct BBInfo { + bool IsDone : 1; + bool IsBeingAnalyzed : 1; + bool IsAnalyzed : 1; + bool IsEnqueued : 1; + bool IsBrAnalyzable : 1; + bool HasFallThrough : 1; + bool IsUnpredicable : 1; + bool CannotBeCopied : 1; + bool ClobbersPred : 1; + unsigned NonPredSize; + unsigned ExtraCost; + unsigned ExtraCost2; + MachineBasicBlock *BB; + MachineBasicBlock *TrueBB; + MachineBasicBlock *FalseBB; + SmallVector<MachineOperand, 4> BrCond; + SmallVector<MachineOperand, 4> Predicate; + BBInfo() : IsDone(false), IsBeingAnalyzed(false), + IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), + HasFallThrough(false), IsUnpredicable(false), + CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), + ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {} + }; + + /// IfcvtToken - Record information about pending if-conversions to attempt: + /// BBI - Corresponding BBInfo. + /// Kind - Type of block. See IfcvtKind. + /// NeedSubsumption - True if the to-be-predicated BB has already been + /// predicated. + /// NumDups - Number of instructions that would be duplicated due + /// to this if-conversion. (For diamonds, the number of + /// identical instructions at the beginnings of both + /// paths). + /// NumDups2 - For diamonds, the number of identical instructions + /// at the ends of both paths. + struct IfcvtToken { + BBInfo &BBI; + IfcvtKind Kind; + bool NeedSubsumption; + unsigned NumDups; + unsigned NumDups2; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) + : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + }; + + /// BBAnalysis - Results of if-conversion feasibility analysis indexed by + /// basic block number. + std::vector<BBInfo> BBAnalysis; + + const TargetLoweringBase *TLI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; + const MachineBranchProbabilityInfo *MBPI; + MachineRegisterInfo *MRI; + + bool PreRegAlloc; + bool MadeChange; + int FnNum; + public: + static char ID; + IfConverter() : MachineFunctionPass(ID), FnNum(-1) { + initializeIfConverterPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + bool ReverseBranchCondition(BBInfo &BBI); + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + const BranchProbability &Prediction) const; + bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups, + const BranchProbability &Prediction) const; + bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const; + void ScanInstructions(BBInfo &BBI); + BBInfo &AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens); + bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, + bool isTriangle = false, bool RevBranch = false); + void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens); + void InvalidatePreds(MachineBasicBlock *BB); + void RemoveExtraEdges(BBInfo &BBI); + bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2); + void PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, + SmallSet<unsigned, 4> *LaterRedefs = 0); + void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, + bool IgnoreBr = false); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); + + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, + unsigned Cycle, unsigned Extra, + const BranchProbability &Prediction) const { + return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, + Prediction); + } + + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, + unsigned TCycle, unsigned TExtra, + MachineBasicBlock &FBB, + unsigned FCycle, unsigned FExtra, + const BranchProbability &Prediction) const { + return TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, + Prediction); + } + + // blockAlwaysFallThrough - Block ends without a terminator. + bool blockAlwaysFallThrough(BBInfo &BBI) const { + return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; + } + + // IfcvtTokenCmp - Used to sort if-conversion candidates. + static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { + int Incr1 = (C1->Kind == ICDiamond) + ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; + int Incr2 = (C2->Kind == ICDiamond) + ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups; + if (Incr1 > Incr2) + return true; + else if (Incr1 == Incr2) { + // Favors subsumption. + if (C1->NeedSubsumption == false && C2->NeedSubsumption == true) + return true; + else if (C1->NeedSubsumption == C2->NeedSubsumption) { + // Favors diamond over triangle, etc. + if ((unsigned)C1->Kind < (unsigned)C2->Kind) + return true; + else if (C1->Kind == C2->Kind) + return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber(); + } + } + return false; + } + }; + + char IfConverter::ID = 0; +} + +char &llvm::IfConverterID = IfConverter::ID; + +INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) + +bool IfConverter::runOnMachineFunction(MachineFunction &MF) { + TLI = MF.getTarget().getTargetLowering(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MRI = &MF.getRegInfo(); + InstrItins = MF.getTarget().getInstrItineraryData(); + if (!TII) return false; + + PreRegAlloc = MRI->isSSA(); + + bool BFChange = false; + if (!PreRegAlloc) { + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true, false); + BFChange = BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + } + + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getName() << "\'"); + + if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { + DEBUG(dbgs() << " skipped\n"); + return false; + } + DEBUG(dbgs() << "\n"); + + MF.RenumberBlocks(); + BBAnalysis.resize(MF.getNumBlockIDs()); + + std::vector<IfcvtToken*> Tokens; + MadeChange = false; + unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { + // Do an initial analysis for each basic block and find all the potential + // candidates to perform if-conversion. + bool Change = false; + AnalyzeBlocks(MF, Tokens); + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + BBInfo &BBI = Token->BBI; + IfcvtKind Kind = Token->Kind; + unsigned NumDups = Token->NumDups; + unsigned NumDups2 = Token->NumDups2; + + delete Token; + + // If the block has been evicted out of the queue or it has already been + // marked dead (due to it being predicated), then skip it. + if (BBI.IsDone) + BBI.IsEnqueued = false; + if (!BBI.IsEnqueued) + continue; + + BBI.IsEnqueued = false; + + bool RetVal = false; + switch (Kind) { + default: llvm_unreachable("Unexpected!"); + case ICSimple: + case ICSimpleFalse: { + bool isFalse = Kind == ICSimpleFalse; + if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? + " false" : "") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "); + RetVal = IfConvertSimple(BBI, Kind); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) { + if (isFalse) ++NumSimpleFalse; + else ++NumSimple; + } + break; + } + case ICTriangle: + case ICTriangleRev: + case ICTriangleFalse: + case ICTriangleFRev: { + bool isFalse = Kind == ICTriangleFalse; + bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); + if (DisableTriangle && !isFalse && !isRev) break; + if (DisableTriangleR && !isFalse && isRev) break; + if (DisableTriangleF && isFalse && !isRev) break; + if (DisableTriangleFR && isFalse && isRev) break; + DEBUG(dbgs() << "Ifcvt (Triangle"); + if (isFalse) + DEBUG(dbgs() << " false"); + if (isRev) + DEBUG(dbgs() << " rev"); + DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertTriangle(BBI, Kind); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) { + if (isFalse) { + if (isRev) ++NumTriangleFRev; + else ++NumTriangleFalse; + } else { + if (isRev) ++NumTriangleRev; + else ++NumTriangle; + } + } + break; + } + case ICDiamond: { + if (DisableDiamond) break; + DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) ++NumDiamonds; + break; + } + } + + Change |= RetVal; + + NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit) + break; + } + + if (!Change) + break; + MadeChange |= Change; + } + + // Delete tokens in case of early exit. + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + delete Token; + } + + Tokens.clear(); + BBAnalysis.clear(); + + if (MadeChange && IfCvtBranchFold) { + BranchFolder BF(false, false); + BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + } + + MadeChange |= BFChange; + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return NULL; +} + +/// ReverseBranchCondition - Reverse the condition of the end of the block +/// branch. Swap block's 'true' and 'false' successors. +bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + DebugLoc dl; // FIXME: this is nowhere + if (!TII->ReverseBranchCondition(BBI.BrCond)) { + TII->RemoveBranch(*BBI.BB); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); + std::swap(BBI.TrueBB, BBI.FalseBB); + return true; + } + return false; +} + +/// getNextBlock - Returns the next block in the function blocks ordering. If +/// it is the end, returns NULL. +static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { + MachineFunction::iterator I = BB; + MachineFunction::iterator E = BB->getParent()->end(); + if (++I == E) + return NULL; + return I; +} + +/// ValidSimple - Returns true if the 'true' block (along with its +/// predecessor) forms a valid simple shape for ifcvt. It also returns the +/// number of instructions that the ifcvt would need to duplicate if performed +/// in Dups. +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + const BranchProbability &Prediction) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.IsBrAnalyzable) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied || + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, + Prediction)) + return false; + Dups = TrueBBI.NonPredSize; + } + + return true; +} + +/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid triangle shape for ifcvt. +/// If 'FalseBranch' is true, it checks if 'true' block's false branch +/// branches to the 'false' block rather than the other way around. It also +/// returns the number of instructions that the ifcvt would need to duplicate +/// if performed in 'Dups'. +bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups, + const BranchProbability &Prediction) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied) + return false; + + unsigned Size = TrueBBI.NonPredSize; + if (TrueBBI.IsBrAnalyzable) { + if (TrueBBI.TrueBB && TrueBBI.BrCond.empty()) + // Ends with an unconditional branch. It will be removed. + --Size; + else { + MachineBasicBlock *FExit = FalseBranch + ? TrueBBI.TrueBB : TrueBBI.FalseBB; + if (FExit) + // Require a conditional branch + ++Size; + } + } + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction)) + return false; + Dups = Size; + } + + MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; + if (!TExit && blockAlwaysFallThrough(TrueBBI)) { + MachineFunction::iterator I = TrueBBI.BB; + if (++I == TrueBBI.BB->getParent()->end()) + return false; + TExit = I; + } + return TExit && TExit == FalseBBI.BB; +} + +/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid diamond shape for ifcvt. +bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + + if (!TT && blockAlwaysFallThrough(TrueBBI)) + TT = getNextBlock(TrueBBI.BB); + if (!FT && blockAlwaysFallThrough(FalseBBI)) + FT = getNextBlock(FalseBBI.BB); + if (TT != FT) + return false; + if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + return false; + + // Count duplicate instructions at the beginning of the true and false blocks. + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + while (TIB != TIE && FIB != FIE) { + // Skip dbg_value instructions. These do not count. + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) + break; + } + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) + break; + } + if (!TIB->isIdenticalTo(FIB)) + break; + ++Dups1; + ++TIB; + ++FIB; + } + + // Now, in preparation for counting duplicate instructions at the ends of the + // blocks, move the end iterators up past any branch instructions. + while (TIE != TIB) { + --TIE; + if (!TIE->isBranch()) + break; + } + while (FIE != FIB) { + --FIE; + if (!FIE->isBranch()) + break; + } + + // If Dups1 includes all of a block, then don't count duplicate + // instructions at the end of the blocks. + if (TIB == TIE || FIB == FIE) + return true; + + // Count duplicate instructions at the ends of the blocks. + while (TIE != TIB && FIE != FIB) { + // Skip dbg_value instructions. These do not count. + if (TIE->isDebugValue()) { + while (TIE != TIB && TIE->isDebugValue()) + --TIE; + if (TIE == TIB) + break; + } + if (FIE->isDebugValue()) { + while (FIE != FIB && FIE->isDebugValue()) + --FIE; + if (FIE == FIB) + break; + } + if (!TIE->isIdenticalTo(FIE)) + break; + ++Dups2; + --TIE; + --FIE; + } + + return true; +} + +/// ScanInstructions - Scan all the instructions in the block to determine if +/// the block is predicable. In most cases, that means all the instructions +/// in the block are isPredicable(). Also checks if the block contains any +/// instruction which can clobber a predicate (e.g. condition code register). +/// If so, the block is not predicable unless it's the last instruction. +void IfConverter::ScanInstructions(BBInfo &BBI) { + if (BBI.IsDone) + return; + + bool AlreadyPredicated = !BBI.Predicate.empty(); + // First analyze the end of BB branches. + BBI.TrueBB = BBI.FalseBB = NULL; + BBI.BrCond.clear(); + BBI.IsBrAnalyzable = + !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL; + + if (BBI.BrCond.size()) { + // No false branch. This BB must end with a conditional branch and a + // fallthrough. + if (!BBI.FalseBB) + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + if (!BBI.FalseBB) { + // Malformed bcc? True and false blocks are the same? + BBI.IsUnpredicable = true; + return; + } + } + + // Then scan all the instructions. + BBI.NonPredSize = 0; + BBI.ExtraCost = 0; + BBI.ExtraCost2 = 0; + BBI.ClobbersPred = false; + for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + if (I->isNotDuplicable()) + BBI.CannotBeCopied = true; + + bool isPredicated = TII->isPredicated(I); + bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); + + if (!isCondBr) { + if (!isPredicated) { + BBI.NonPredSize++; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, + &ExtraPredCost); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; + } + } + + if (BBI.ClobbersPred && !isPredicated) { + // Predicate modification instruction should end the block (except for + // already predicated instructions and end of block branches). + if (isCondBr) { + // A conditional branch is not predicable, but it may be eliminated. + continue; + } + + // Predicate may have been modified, the subsequent (currently) + // unpredicated instructions cannot be correctly predicated. + BBI.IsUnpredicable = true; + return; + } + + // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are + // still potentially predicable. + std::vector<MachineOperand> PredDefs; + if (TII->DefinesPredicate(I, PredDefs)) + BBI.ClobbersPred = true; + + if (!TII->isPredicable(I)) { + BBI.IsUnpredicable = true; + return; + } + } +} + +/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be +/// predicated by the specified predicate. +bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, + SmallVectorImpl<MachineOperand> &Pred, + bool isTriangle, bool RevBranch) { + // If the block is dead or unpredicable, then it cannot be predicated. + if (BBI.IsDone || BBI.IsUnpredicable) + return false; + + // If it is already predicated, check if its predicate subsumes the new + // predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + return false; + + if (BBI.BrCond.size()) { + if (!isTriangle) + return false; + + // Test predicate subsumption. + SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end()); + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (RevBranch) { + if (TII->ReverseBranchCondition(Cond)) + return false; + } + if (TII->ReverseBranchCondition(RevPred) || + !TII->SubsumesPredicate(Cond, RevPred)) + return false; + } + + return true; +} + +/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from +/// the specified block. Record its successors and whether it looks like an +/// if-conversion candidate. +IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, + std::vector<IfcvtToken*> &Tokens) { + BBInfo &BBI = BBAnalysis[BB->getNumber()]; + + if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) + return BBI; + + BBI.BB = BB; + BBI.IsBeingAnalyzed = true; + + ScanInstructions(BBI); + + // Unanalyzable or ends with fallthrough or unconditional branch, or if is not + // considered for ifcvt anymore. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + // Do not ifcvt if either path is a back edge to the entry block. + if (BBI.TrueBB == BB || BBI.FalseBB == BB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + // Do not ifcvt if true and false fallthrough blocks are the same. + if (!BBI.FalseBB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens); + BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens); + + if (TrueBBI.IsDone && FalseBBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + bool CanRevCond = !TII->ReverseBranchCondition(RevCond); + + unsigned Dups = 0; + unsigned Dups2 = 0; + bool TNeedSub = !TrueBBI.Predicate.empty(); + bool FNeedSub = !FalseBBI.Predicate.empty(); + bool Enqueued = false; + + BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); + + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + + TrueBBI.ExtraCost), TrueBBI.ExtraCost2, + *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, + Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, + Dups2)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { + // Triangle: + // EBB + // | \_ + // | | + // | TBB + // | / + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(TrueBBI, Dups, Prediction) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { + // Simple (split, no rejoin): + // EBB + // | \_ + // | | + // | TBB---> exit + // | + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); + Enqueued = true; + } + + if (CanRevCond) { + // Try the other path... + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, + Prediction.getCompl()) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, Prediction.getCompl()) && + FeasibilityAnalysis(FalseBBI, RevCond, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, + Prediction.getCompl()) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, Prediction.getCompl()) && + FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, Prediction.getCompl()) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); + Enqueued = true; + } + } + + BBI.IsEnqueued = Enqueued; + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; +} + +/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion +/// candidates. +void IfConverter::AnalyzeBlocks(MachineFunction &MF, + std::vector<IfcvtToken*> &Tokens) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + AnalyzeBlock(BB, Tokens); + } + + // Sort to favor more complex ifcvt scheme. + std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); +} + +/// canFallThroughTo - Returns true either if ToBB is the next block after BB or +/// that all the intervening blocks are empty (given BB can fall through to its +/// next block). +static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { + MachineFunction::iterator PI = BB; + MachineFunction::iterator I = llvm::next(PI); + MachineFunction::iterator TI = ToBB; + MachineFunction::iterator E = BB->getParent()->end(); + while (I != TI) { + // Check isSuccessor to avoid case where the next block is empty, but + // it's not a successor. + if (I == E || !I->empty() || !PI->isSuccessor(I)) + return false; + PI = I++; + } + return true; +} + +/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed +/// to determine if it can be if-converted. If predecessor is already enqueued, +/// dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; + if (PBBI.IsDone || PBBI.BB == BB) + continue; + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } +} + +/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB. +/// +static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, + const TargetInstrInfo *TII) { + DebugLoc dl; // FIXME: this is nowhere + SmallVector<MachineOperand, 0> NoCond; + TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl); +} + +/// RemoveExtraEdges - Remove true / false edges if either / both are no longer +/// successors. +void IfConverter::RemoveExtraEdges(BBInfo &BBI) { + MachineBasicBlock *TBB = NULL, *FBB = NULL; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); +} + +/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are +/// modeled as read + write (sort like two-address instructions). These +/// routines track register liveness and add implicit uses to if-converted +/// instructions to conform to the model. +static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Redefs.insert(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.insert(*SubRegs); + } +} + +static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI, + bool AddImpUse = false) { + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Defs.push_back(Reg); + else if (MO.isKill()) { + Redefs.erase(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.erase(*SubRegs); + } + } + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (!Redefs.insert(Reg)) { + if (AddImpUse) + // Treat predicated update as read + write. + MIB.addReg(Reg, RegState::Implicit | RegState::Undef); + } else { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.insert(*SubRegs); + } + } +} + +static void UpdatePredRedefs(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI) { + while (I != E) { + UpdatePredRedefs(I, Redefs, TRI); + ++I; + } +} + +/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. +/// +bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICSimpleFalse) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICSimpleFalse) + if (TII->ReverseBranchCondition(Cond)) + llvm_unreachable("Unable to reverse branch condition!"); + + // Initialize liveins to the first BB. These are potentiall redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + } else { + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + + // Merge converted block into entry block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + bool IterIfcvt = true; + if (!canFallThroughTo(BBI.BB, NextBBI->BB)) { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + // Now ifcvt'd block will look like this: + // BB: + // ... + // t, f = cmp + // if t op + // b BBf + // + // We cannot further ifcvt this block because the unconditional branch + // will have to be predicated on the new condition, that will not be + // available if cmp executes. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertTriangle - If convert a triangle sub-CFG. +/// +bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + DebugLoc dl; // FIXME: this is nowhere + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + if (TII->ReverseBranchCondition(Cond)) + llvm_unreachable("Unable to reverse branch condition!"); + + if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { + if (ReverseBranchCondition(*CvtBBI)) { + // BB has been changed, modify its predecessors (except for this + // one) so they don't get ifcvt'ed based on bad intel. + for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(), + E = CvtBBI->BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PBB = *PI; + if (PBB == BBI.BB) + continue; + BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; + if (PBBI.IsEnqueued) { + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } + } + } + } + + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + + bool HasEarlyExit = CvtBBI->FalseBB != NULL; + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + } else { + // Predicate the 'true' block after removing its branch. + CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + + // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI, false); + } + + // If 'true' block has a 'false' successor, add an exit branch to it. + if (HasEarlyExit) { + SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), + CvtBBI->BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + llvm_unreachable("Unable to reverse branch condition!"); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); + BBI.BB->addSuccessor(CvtBBI->FalseBB); + } + + // Merge in the 'false' block if the 'false' block has no other + // predecessors. Otherwise, add an unconditional branch to 'false'. + bool FalseBBDead = false; + bool IterIfcvt = true; + bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB); + if (!isFallThrough) { + // Only merge them if the true block does not fallthrough to the false + // block. By not merging them, we make it possible to iteratively + // ifcvt the blocks. + if (!HasEarlyExit && + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { + MergeBlocks(BBI, *NextBBI); + FalseBBDead = true; + } else { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + } + // Mixed predicated and unpredicated code. This cannot be iteratively + // predicated. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + if (FalseBBDead) + NextBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (TrueBBI.IsDone || FalseBBI.IsDone || + TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + // Put the predicated instructions from the 'true' block before the + // instructions from the 'false' block, unless the true block would clobber + // the predicate, in which case, do the opposite. + BBInfo *BBI1 = &TrueBBI; + BBInfo *BBI2 = &FalseBBI; + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + llvm_unreachable("Unable to reverse branch condition!"); + SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond; + SmallVector<MachineOperand, 4> *Cond2 = &RevCond; + + // Figure out the more profitable ordering. + bool DoSwap = false; + if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + DoSwap = true; + else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) + DoSwap = true; + } + if (DoSwap) { + std::swap(BBI1, BBI2); + std::swap(Cond1, Cond2); + } + + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(BBI1->BB, Redefs, TRI); + + // Remove the duplicated instructions at the beginnings of both paths. + MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); + MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + MachineBasicBlock::iterator DIE1 = BBI1->BB->end(); + MachineBasicBlock::iterator DIE2 = BBI2->BB->end(); + // Skip dbg_value instructions + while (DI1 != DIE1 && DI1->isDebugValue()) + ++DI1; + while (DI2 != DIE2 && DI2->isDebugValue()) + ++DI2; + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + + // Skip past the dups on each side separately since there may be + // differing dbg_value entries. + for (unsigned i = 0; i < NumDups1; ++DI1) { + if (!DI1->isDebugValue()) + ++i; + } + while (NumDups1 != 0) { + ++DI2; + if (!DI2->isDebugValue()) + --NumDups1; + } + + UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + + // Remove branch from 'true' block and remove duplicated instructions. + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI1 != BBI1->BB->begin()); + --DI1; + // skip dbg_value instructions + if (!DI1->isDebugValue()) + ++i; + } + BBI1->BB->erase(DI1, BBI1->BB->end()); + + // Remove 'false' block branch and find the last instruction to predicate. + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI2 != BBI2->BB->begin()); + --DI2; + // skip dbg_value instructions + if (!DI2->isDebugValue()) + --NumDups2; + } + + // Remember which registers would later be defined by the false block. + // This allows us not to predicate instructions in the true block that would + // later be re-defined. That is, rather than + // subeq r0, r1, #1 + // addne r0, r1, #1 + // generate: + // sub r0, r1, #1 + // addne r0, r1, #1 + SmallSet<unsigned, 4> RedefsByFalse; + SmallSet<unsigned, 4> ExtUses; + if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) { + for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) { + if (FI->isDebugValue()) + continue; + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = FI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + Defs.push_back(Reg); + } else if (!RedefsByFalse.count(Reg)) { + // These are defined before ctrl flow reach the 'false' instructions. + // They cannot be modified by the 'true' instructions. + ExtUses.insert(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + ExtUses.insert(*SubRegs); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (!ExtUses.count(Reg)) { + RedefsByFalse.insert(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RedefsByFalse.insert(*SubRegs); + } + } + } + } + + // Predicate the 'true' block. + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse); + + // Predicate the 'false' block. + PredicateBlock(*BBI2, DI2, *Cond2, Redefs); + + // Merge the true block into the entry of the diamond. + MergeBlocks(BBI, *BBI1, TailBB == 0); + MergeBlocks(BBI, *BBI2, TailBB == 0); + + // If the if-converted block falls through or unconditionally branches into + // the tail block, and the tail block does not have other predecessors, then + // fold the tail block in as well. Otherwise, unless it falls through to the + // tail, add a unconditional branch to it. + if (TailBB) { + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; + bool CanMergeTail = !TailBBI.HasFallThrough; + // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; + // check if there are any other predecessors besides those. + unsigned NumPreds = TailBB->pred_size(); + if (NumPreds > 1) + CanMergeTail = false; + else if (NumPreds == 1 && CanMergeTail) { + MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); + if (*PI != BBI1->BB && *PI != BBI2->BB) + CanMergeTail = false; + } + if (CanMergeTail) { + MergeBlocks(BBI, TailBBI); + TailBBI.IsDone = true; + } else { + BBI.BB->addSuccessor(TailBB); + InsertUncondBranch(BBI.BB, TailBB, TII); + BBI.HasFallThrough = false; + } + } + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, + // which can happen here if TailBB is unanalyzable and is merged, so + // explicitly remove BBI1 and BBI2 as successors. + BBI.BB->removeSuccessor(BBI1->BB); + BBI.BB->removeSuccessor(BBI2->BB); + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +static bool MaySpeculate(const MachineInstr *MI, + SmallSet<unsigned, 4> &LaterRedefs, + const TargetInstrInfo *TII) { + bool SawStore = true; + if (!MI->isSafeToMove(TII, 0, SawStore)) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef() && !LaterRedefs.count(Reg)) + return false; + } + + return true; +} + +/// PredicateBlock - Predicate instructions from the start of the block to the +/// specified end with the specified condition. +void IfConverter::PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, + SmallSet<unsigned, 4> *LaterRedefs) { + bool AnyUnpred = false; + bool MaySpec = LaterRedefs != 0; + for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { + if (I->isDebugValue() || TII->isPredicated(I)) + continue; + // It may be possible not to predicate an instruction if it's the 'true' + // side of a diamond and the 'false' side may re-define the instruction's + // defs. + if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) { + AnyUnpred = true; + continue; + } + // If any instruction is predicated, then every instruction after it must + // be predicated. + MaySpec = false; + if (!TII->PredicateInstruction(I, Cond)) { +#ifndef NDEBUG + dbgs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(I, Redefs, TRI, true); + } + + std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + + BBI.IsAnalyzed = false; + BBI.NonPredSize = 0; + + ++NumIfConvBBs; + if (AnyUnpred) + ++NumUnpred; +} + +/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to +/// the destination block. Skip end of block branches if IgnoreBr is true. +void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, + bool IgnoreBr) { + MachineFunction &MF = *ToBBI.BB->getParent(); + + for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), + E = FromBBI.BB->end(); I != E; ++I) { + // Do not copy the end of the block branches. + if (IgnoreBr && I->isBranch()) + break; + + MachineInstr *MI = MF.CloneMachineInstr(I); + ToBBI.BB->insert(ToBBI.BB->end(), MI); + ToBBI.NonPredSize++; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + if (NumCycles > 1) + ToBBI.ExtraCost += NumCycles-1; + ToBBI.ExtraCost2 += ExtraPredCost; + + if (!TII->isPredicated(I) && !MI->isDebugValue()) { + if (!TII->PredicateInstruction(MI, Cond)) { +#ifndef NDEBUG + dbgs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); + } + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(MI, Redefs, TRI, true); + } + + if (!IgnoreBr) { + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } + } + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.IsAnalyzed = false; + + ++NumDupBBs; +} + +/// MergeBlocks - Move all instructions from FromBB to the end of ToBB. +/// This will leave FromBB as an empty block, so remove all of its +/// successor edges except for the fall-through edge. If AddEdges is true, +/// i.e., when FromBBI's branch is being moved, add those successor edges to +/// ToBBI. +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { + ToBBI.BB->splice(ToBBI.BB->end(), + FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + FromBBI.BB->removeSuccessor(Succ); + if (AddEdges && !ToBBI.BB->isSuccessor(Succ)) + ToBBI.BB->addSuccessor(Succ); + } + + // Now FromBBI always falls through to the next block! + if (NBB && !FromBBI.BB->isSuccessor(NBB)) + FromBBI.BB->addSuccessor(NBB); + + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), + std::back_inserter(ToBBI.Predicate)); + FromBBI.Predicate.clear(); + + ToBBI.NonPredSize += FromBBI.NonPredSize; + ToBBI.ExtraCost += FromBBI.ExtraCost; + ToBBI.ExtraCost2 += FromBBI.ExtraCost2; + FromBBI.NonPredSize = 0; + FromBBI.ExtraCost = 0; + FromBBI.ExtraCost2 = 0; + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.HasFallThrough = FromBBI.HasFallThrough; + ToBBI.IsAnalyzed = false; + FromBBI.IsAnalyzed = false; +} diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp new file mode 100644 index 0000000..c6d1a18 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -0,0 +1,1295 @@ +//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The inline spiller modifies the machine function directly instead of +// inserting spills and restores in VirtRegMap. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "Spiller.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +STATISTIC(NumSpilledRanges, "Number of spilled live ranges"); +STATISTIC(NumSnippets, "Number of spilled snippets"); +STATISTIC(NumSpills, "Number of spills inserted"); +STATISTIC(NumSpillsRemoved, "Number of spills removed"); +STATISTIC(NumReloads, "Number of reloads inserted"); +STATISTIC(NumReloadsRemoved, "Number of reloads removed"); +STATISTIC(NumFolded, "Number of folded stack accesses"); +STATISTIC(NumFoldedLoads, "Number of folded loads"); +STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); +STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads"); +STATISTIC(NumHoists, "Number of hoisted spills"); + +static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, + cl::desc("Disable inline spill hoisting")); + +namespace { +class InlineSpiller : public Spiller { + MachineFunction &MF; + LiveIntervals &LIS; + LiveStacks &LSS; + AliasAnalysis *AA; + MachineDominatorTree &MDT; + MachineLoopInfo &Loops; + VirtRegMap &VRM; + MachineFrameInfo &MFI; + MachineRegisterInfo &MRI; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + + // Variables that are valid during spill(), but used by multiple methods. + LiveRangeEdit *Edit; + LiveInterval *StackInt; + int StackSlot; + unsigned Original; + + // All registers to spill to StackSlot, including the main register. + SmallVector<unsigned, 8> RegsToSpill; + + // All COPY instructions to/from snippets. + // They are ignored since both operands refer to the same stack slot. + SmallPtrSet<MachineInstr*, 8> SnippetCopies; + + // Values that failed to remat at some point. + SmallPtrSet<VNInfo*, 8> UsedValues; + +public: + // Information about a value that was defined by a copy from a sibling + // register. + struct SibValueInfo { + // True when all reaching defs were reloads: No spill is necessary. + bool AllDefsAreReloads; + + // True when value is defined by an original PHI not from splitting. + bool DefByOrigPHI; + + // True when the COPY defining this value killed its source. + bool KillsSource; + + // The preferred register to spill. + unsigned SpillReg; + + // The value of SpillReg that should be spilled. + VNInfo *SpillVNI; + + // The block where SpillVNI should be spilled. Currently, this must be the + // block containing SpillVNI->def. + MachineBasicBlock *SpillMBB; + + // A defining instruction that is not a sibling copy or a reload, or NULL. + // This can be used as a template for rematerialization. + MachineInstr *DefMI; + + // List of values that depend on this one. These values are actually the + // same, but live range splitting has placed them in different registers, + // or SSA update needed to insert PHI-defs to preserve SSA form. This is + // copies of the current value and phi-kills. Usually only phi-kills cause + // more than one dependent value. + TinyPtrVector<VNInfo*> Deps; + + SibValueInfo(unsigned Reg, VNInfo *VNI) + : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false), + SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {} + + // Returns true when a def has been found. + bool hasDef() const { return DefByOrigPHI || DefMI; } + }; + +private: + // Values in RegsToSpill defined by sibling copies. + typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap; + SibValueMap SibValues; + + // Dead defs generated during spilling. + SmallVector<MachineInstr*, 8> DeadDefs; + + ~InlineSpiller() {} + +public: + InlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) + : MF(mf), + LIS(pass.getAnalysis<LiveIntervals>()), + LSS(pass.getAnalysis<LiveStacks>()), + AA(&pass.getAnalysis<AliasAnalysis>()), + MDT(pass.getAnalysis<MachineDominatorTree>()), + Loops(pass.getAnalysis<MachineLoopInfo>()), + VRM(vrm), + MFI(*mf.getFrameInfo()), + MRI(mf.getRegInfo()), + TII(*mf.getTarget().getInstrInfo()), + TRI(*mf.getTarget().getRegisterInfo()) {} + + void spill(LiveRangeEdit &); + +private: + bool isSnippet(const LiveInterval &SnipLI); + void collectRegsToSpill(); + + bool isRegToSpill(unsigned Reg) { + return std::find(RegsToSpill.begin(), + RegsToSpill.end(), Reg) != RegsToSpill.end(); + } + + bool isSibling(unsigned Reg); + MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); + void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0); + void analyzeSiblingValues(); + + bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); + void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); + + void markValueUsed(LiveInterval*, VNInfo*); + bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI); + void reMaterializeAll(); + + bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); + bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, + MachineInstr *LoadMI = 0); + void insertReload(LiveInterval &NewLI, SlotIndex, + MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex, MachineBasicBlock::iterator MI); + + void spillAroundUses(unsigned Reg); + void spillAll(); +}; +} + +namespace llvm { +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + return new InlineSpiller(pass, mf, vrm); +} +} + +//===----------------------------------------------------------------------===// +// Snippets +//===----------------------------------------------------------------------===// + +// When spilling a virtual register, we also spill any snippets it is connected +// to. The snippets are small live ranges that only have a single real use, +// leftovers from live range splitting. Spilling them enables memory operand +// folding or tightens the live range around the single use. +// +// This minimizes register pressure and maximizes the store-to-load distance for +// spill slots which can be important in tight loops. + +/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, +/// otherwise return 0. +static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { + if (!MI->isFullCopy()) + return 0; + if (MI->getOperand(0).getReg() == Reg) + return MI->getOperand(1).getReg(); + if (MI->getOperand(1).getReg() == Reg) + return MI->getOperand(0).getReg(); + return 0; +} + +/// isSnippet - Identify if a live interval is a snippet that should be spilled. +/// It is assumed that SnipLI is a virtual register with the same original as +/// Edit->getReg(). +bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { + unsigned Reg = Edit->getReg(); + + // A snippet is a tiny live range with only a single instruction using it + // besides copies to/from Reg or spills/fills. We accept: + // + // %snip = COPY %Reg / FILL fi# + // %snip = USE %snip + // %Reg = COPY %snip / SPILL %snip, fi# + // + if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) + return false; + + MachineInstr *UseMI = 0; + + // Check that all uses satisfy our criteria. + for (MachineRegisterInfo::reg_nodbg_iterator + RI = MRI.reg_nodbg_begin(SnipLI.reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Allow copies to/from Reg. + if (isFullCopyOf(MI, Reg)) + continue; + + // Allow stack slot loads. + int FI; + if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow stack slot stores. + if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow a single additional instruction. + if (UseMI && MI != UseMI) + return false; + UseMI = MI; + } + return true; +} + +/// collectRegsToSpill - Collect live range snippets that only have a single +/// real use. +void InlineSpiller::collectRegsToSpill() { + unsigned Reg = Edit->getReg(); + + // Main register always spills. + RegsToSpill.assign(1, Reg); + SnippetCopies.clear(); + + // Snippets all have the same original, so there can't be any for an original + // register. + if (Original == Reg) + return; + + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); + MachineInstr *MI = RI.skipInstruction();) { + unsigned SnipReg = isFullCopyOf(MI, Reg); + if (!isSibling(SnipReg)) + continue; + LiveInterval &SnipLI = LIS.getInterval(SnipReg); + if (!isSnippet(SnipLI)) + continue; + SnippetCopies.insert(MI); + if (isRegToSpill(SnipReg)) + continue; + RegsToSpill.push_back(SnipReg); + DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); + ++NumSnippets; + } +} + + +//===----------------------------------------------------------------------===// +// Sibling Values +//===----------------------------------------------------------------------===// + +// After live range splitting, some values to be spilled may be defined by +// copies from sibling registers. We trace the sibling copies back to the +// original value if it still exists. We need it for rematerialization. +// +// Even when the value can't be rematerialized, we still want to determine if +// the value has already been spilled, or we may want to hoist the spill from a +// loop. + +bool InlineSpiller::isSibling(unsigned Reg) { + return TargetRegisterInfo::isVirtualRegister(Reg) && + VRM.getOriginal(Reg) == Original; +} + +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, + const InlineSpiller::SibValueInfo &SVI) { + OS << "spill " << PrintReg(SVI.SpillReg) << ':' + << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def; + if (SVI.SpillMBB) + OS << " in BB#" << SVI.SpillMBB->getNumber(); + if (SVI.AllDefsAreReloads) + OS << " all-reloads"; + if (SVI.DefByOrigPHI) + OS << " orig-phi"; + if (SVI.KillsSource) + OS << " kill"; + OS << " deps["; + for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i) + OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def; + OS << " ]"; + if (SVI.DefMI) + OS << " def: " << *SVI.DefMI; + else + OS << '\n'; + return OS; +} +#endif + +/// propagateSiblingValue - Propagate the value in SVI to dependents if it is +/// known. Otherwise remember the dependency for later. +/// +/// @param SVI SibValues entry to propagate. +/// @param VNI Dependent value, or NULL to propagate to all saved dependents. +void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, + VNInfo *VNI) { + // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. + TinyPtrVector<VNInfo*> FirstDeps; + if (VNI) { + FirstDeps.push_back(VNI); + SVI->second.Deps.push_back(VNI); + } + + // Has the value been completely determined yet? If not, defer propagation. + if (!SVI->second.hasDef()) + return; + + // Work list of values to propagate. It would be nice to use a SetVector + // here, but then we would be forced to use a SmallSet. + SmallVector<SibValueMap::iterator, 8> WorkList(1, SVI); + SmallPtrSet<VNInfo*, 8> WorkSet; + + do { + SVI = WorkList.pop_back_val(); + WorkSet.erase(SVI->first); + TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; + VNI = 0; + + SibValueInfo &SV = SVI->second; + if (!SV.SpillMBB) + SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def); + + DEBUG(dbgs() << " prop to " << Deps->size() << ": " + << SVI->first->id << '@' << SVI->first->def << ":\t" << SV); + + assert(SV.hasDef() && "Propagating undefined value"); + + // Should this value be propagated as a preferred spill candidate? We don't + // propagate values of registers that are about to spill. + bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); + unsigned SpillDepth = ~0u; + + for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(), + DepE = Deps->end(); DepI != DepE; ++DepI) { + SibValueMap::iterator DepSVI = SibValues.find(*DepI); + assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); + SibValueInfo &DepSV = DepSVI->second; + if (!DepSV.SpillMBB) + DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def); + + bool Changed = false; + + // Propagate defining instruction. + if (!DepSV.hasDef()) { + Changed = true; + DepSV.DefMI = SV.DefMI; + DepSV.DefByOrigPHI = SV.DefByOrigPHI; + } + + // Propagate AllDefsAreReloads. For PHI values, this computes an AND of + // all predecessors. + if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) { + Changed = true; + DepSV.AllDefsAreReloads = false; + } + + // Propagate best spill value. + if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) { + if (SV.SpillMBB == DepSV.SpillMBB) { + // DepSV is in the same block. Hoist when dominated. + if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) { + // This is an alternative def earlier in the same MBB. + // Hoist the spill as far as possible in SpillMBB. This can ease + // register pressure: + // + // x = def + // y = use x + // s = copy x + // + // Hoisting the spill of s to immediately after the def removes the + // interference between x and y: + // + // x = def + // spill x + // y = use x<kill> + // + // This hoist only helps when the DepSV copy kills its source. + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } else { + // DepSV is in a different block. + if (SpillDepth == ~0u) + SpillDepth = Loops.getLoopDepth(SV.SpillMBB); + + // Also hoist spills to blocks with smaller loop depth, but make sure + // that the new value dominates. Non-phi dependents are always + // dominated, phis need checking. + if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && + (!DepSVI->first->isPHIDef() || + MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } + } + + if (!Changed) + continue; + + // Something changed in DepSVI. Propagate to dependents. + if (WorkSet.insert(DepSVI->first)) + WorkList.push_back(DepSVI); + + DEBUG(dbgs() << " update " << DepSVI->first->id << '@' + << DepSVI->first->def << " to:\t" << DepSV); + } + } while (!WorkList.empty()); +} + +/// traceSiblingValue - Trace a value that is about to be spilled back to the +/// real defining instructions by looking through sibling copies. Always stay +/// within the range of OrigVNI so the registers are known to carry the same +/// value. +/// +/// Determine if the value is defined by all reloads, so spilling isn't +/// necessary - the value is already in the stack slot. +/// +/// Return a defining instruction that may be a candidate for rematerialization. +/// +MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, + VNInfo *OrigVNI) { + // Check if a cached value already exists. + SibValueMap::iterator SVI; + bool Inserted; + tie(SVI, Inserted) = + SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); + if (!Inserted) { + DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second); + return SVI->second.DefMI; + } + + DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << '\n'); + + // List of (Reg, VNI) that have been inserted into SibValues, but need to be + // processed. + SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(UseReg, UseVNI)); + + do { + unsigned Reg; + VNInfo *VNI; + tie(Reg, VNI) = WorkList.pop_back_val(); + DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def + << ":\t"); + + // First check if this value has already been computed. + SVI = SibValues.find(VNI); + assert(SVI != SibValues.end() && "Missing SibValues entry"); + + // Trace through PHI-defs created by live range splitting. + if (VNI->isPHIDef()) { + // Stop at original PHIs. We don't know the value at the predecessors. + if (VNI->def == OrigVNI->def) { + DEBUG(dbgs() << "orig phi value\n"); + SVI->second.DefByOrigPHI = true; + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); + continue; + } + + // This is a PHI inserted by live range splitting. We could trace the + // live-out value from predecessor blocks, but that search can be very + // expensive if there are many predecessors and many more PHIs as + // generated by tail-dup when it sees an indirectbr. Instead, look at + // all the non-PHI defs that have the same value as OrigVNI. They must + // jointly dominate VNI->def. This is not optimal since VNI may actually + // be jointly dominated by a smaller subset of defs, so there is a change + // we will miss a AllDefsAreReloads optimization. + + // Separate all values dominated by OrigVNI into PHIs and non-PHIs. + SmallVector<VNInfo*, 8> PHIs, NonPHIs; + LiveInterval &LI = LIS.getInterval(Reg); + LiveInterval &OrigLI = LIS.getInterval(Original); + + for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); + VI != VE; ++VI) { + VNInfo *VNI2 = *VI; + if (VNI2->isUnused()) + continue; + if (!OrigLI.containsOneValue() && + OrigLI.getVNInfoAt(VNI2->def) != OrigVNI) + continue; + if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def) + PHIs.push_back(VNI2); + else + NonPHIs.push_back(VNI2); + } + DEBUG(dbgs() << "split phi value, checking " << PHIs.size() + << " phi-defs, and " << NonPHIs.size() + << " non-phi/orig defs\n"); + + // Create entries for all the PHIs. Don't add them to the worklist, we + // are processing all of them in one go here. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i]))); + + // Add every PHI as a dependent of all the non-PHIs. + for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) { + VNInfo *NonPHI = NonPHIs[i]; + // Known value? Try an insertion. + tie(SVI, Inserted) = + SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); + // Add all the PHIs as dependents of NonPHI. + for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi) + SVI->second.Deps.push_back(PHIs[pi]); + // This is the first time we see NonPHI, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(Reg, NonPHI)); + else + // Propagate to all inserted PHIs, not just VNI. + propagateSiblingValue(SVI); + } + + // Next work list item. + continue; + } + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + + // Trace through sibling copies. + if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { + if (isSibling(SrcReg)) { + LiveInterval &SrcLI = LIS.getInterval(SrcReg); + LiveRangeQuery SrcQ(SrcLI, VNI->def); + assert(SrcQ.valueIn() && "Copy from non-existing value"); + // Check if this COPY kills its source. + SVI->second.KillsSource = SrcQ.isKill(); + VNInfo *SrcVNI = SrcQ.valueIn(); + DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' + << SrcVNI->id << '@' << SrcVNI->def + << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); + // Known sibling source value? Try an insertion. + tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI, + SibValueInfo(SrcReg, SrcVNI))); + // This is the first time we see Src, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + propagateSiblingValue(SVI, VNI); + // Next work list item. + continue; + } + } + + // Track reachable reloads. + SVI->second.DefMI = MI; + SVI->second.SpillMBB = MI->getParent(); + int FI; + if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "reload\n"); + propagateSiblingValue(SVI); + // Next work list item. + continue; + } + + // Potential remat candidate. + DEBUG(dbgs() << "def " << *MI); + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); + } while (!WorkList.empty()); + + // Look up the value we were looking for. We already did this lookup at the + // top of the function, but SibValues may have been invalidated. + SVI = SibValues.find(UseVNI); + assert(SVI != SibValues.end() && "Didn't compute requested info"); + DEBUG(dbgs() << " traced to:\t" << SVI->second); + return SVI->second.DefMI; +} + +/// analyzeSiblingValues - Trace values defined by sibling copies back to +/// something that isn't a sibling copy. +/// +/// Keep track of values that may be rematerializable. +void InlineSpiller::analyzeSiblingValues() { + SibValues.clear(); + + // No siblings at all? + if (Edit->getReg() == Original) + return; + + LiveInterval &OrigLI = LIS.getInterval(Original); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), + VE = LI.vni_end(); VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = 0; + if (!VNI->isPHIDef()) { + DefMI = LIS.getInstructionFromIndex(VNI->def); + assert(DefMI && "No defining instruction"); + } + // Check possible sibling copies. + if (VNI->isPHIDef() || DefMI->isCopy()) { + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + assert(OrigVNI && "Def outside original live range"); + if (OrigVNI->def != VNI->def) + DefMI = traceSiblingValue(Reg, VNI, OrigVNI); + } + if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { + DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' + << VNI->def << " may remat from " << *DefMI); + } + } + } +} + +/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert +/// a spill at a better location. +bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { + SlotIndex Idx = LIS.getInstructionIndex(CopyMI); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); + assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); + SibValueMap::iterator I = SibValues.find(VNI); + if (I == SibValues.end()) + return false; + + const SibValueInfo &SVI = I->second; + + // Let the normal folding code deal with the boring case. + if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) + return false; + + // SpillReg may have been deleted by remat and DCE. + if (!LIS.hasInterval(SVI.SpillReg)) { + DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); + if (!SibLI.containsValue(SVI.SpillVNI)) { + DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + // Conservatively extend the stack slot range to the range of the original + // value. We may be able to do better with stack slot coloring by being more + // careful here. + assert(StackInt && "No stack slot assigned yet."); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); + StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " + << *StackInt << '\n'); + + // Already spilled everywhere. + if (SVI.AllDefsAreReloads) { + DEBUG(dbgs() << "\tno spill needed: " << SVI); + ++NumOmitReloadSpill; + return true; + } + // We are going to spill SVI.SpillVNI immediately after its def, so clear out + // any later spills of the same value. + eliminateRedundantSpills(SibLI, SVI.SpillVNI); + + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); + MachineBasicBlock::iterator MII; + if (SVI.SpillVNI->isPHIDef()) + MII = MBB->SkipPHIsAndLabels(MBB->begin()); + else { + MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + assert(DefMI && "Defining instruction disappeared"); + MII = DefMI; + ++MII; + } + // Insert spill without kill flag immediately after def. + TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, + MRI.getRegClass(SVI.SpillReg), &TRI); + --MII; // Point to store instruction. + LIS.InsertMachineInstrInMaps(MII); + DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); + + ++NumSpills; + ++NumHoists; + return true; +} + +/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any +/// redundant spills of this value in SLI.reg and sibling copies. +void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { + assert(VNI && "Missing value"); + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(&SLI, VNI)); + assert(StackInt && "No stack slot assigned yet."); + + do { + LiveInterval *LI; + tie(LI, VNI) = WorkList.pop_back_val(); + unsigned Reg = LI->reg; + DEBUG(dbgs() << "Checking redundant spills for " + << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); + + // Regs to spill are taken care of. + if (isRegToSpill(Reg)) + continue; + + // Add all of VNI's live range to StackInt. + StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); + + // Find all spills and copies of VNI. + for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = UI.skipInstruction();) { + if (!MI->isCopy() && !MI->mayStore()) + continue; + SlotIndex Idx = LIS.getInstructionIndex(MI); + if (LI->getVNInfoAt(Idx) != VNI) + continue; + + // Follow sibling copies down the dominator tree. + if (unsigned DstReg = isFullCopyOf(MI, Reg)) { + if (isSibling(DstReg)) { + LiveInterval &DstLI = LIS.getInterval(DstReg); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && "Missing defined value"); + assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot"); + WorkList.push_back(std::make_pair(&DstLI, DstVNI)); + } + continue; + } + + // Erase spills. + int FI; + if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI); + // eliminateDeadDefs won't normally remove stores, so switch opcode. + MI->setDesc(TII.get(TargetOpcode::KILL)); + DeadDefs.push_back(MI); + ++NumSpillsRemoved; + --NumSpills; + } + } + } while (!WorkList.empty()); +} + + +//===----------------------------------------------------------------------===// +// Rematerialization +//===----------------------------------------------------------------------===// + +/// markValueUsed - Remember that VNI failed to rematerialize, so its defining +/// instruction cannot be eliminated. See through snippet copies +void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(LI, VNI)); + do { + tie(LI, VNI) = WorkList.pop_back_val(); + if (!UsedValues.insert(VNI)) + continue; + + if (VNI->isPHIDef()) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)); + if (PVNI) + WorkList.push_back(std::make_pair(LI, PVNI)); + } + continue; + } + + // Follow snippet copies. + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + if (!SnippetCopies.count(MI)) + continue; + LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); + assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); + assert(SnipVNI && "Snippet undefined before copy"); + WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); + } while (!WorkList.empty()); +} + +/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. +bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, + MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); + VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); + + if (!ParentVNI) { + DEBUG(dbgs() << "\tadding <undef> flags: "); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) + MO.setIsUndef(); + } + DEBUG(dbgs() << UseIdx << '\t' << *MI); + return true; + } + + if (SnippetCopies.count(MI)) + return false; + + // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. + LiveRangeEdit::Remat RM(ParentVNI); + SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); + if (SibI != SibValues.end()) + RM.OrigMI = SibI->second.DefMI; + if (!Edit->canRematerializeAt(RM, UseIdx, false)) { + markValueUsed(&VirtReg, ParentVNI); + DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); + return false; + } + + // If the instruction also writes VirtReg.reg, it had better not require the + // same register for uses and defs. + SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; + MIBundleOperands::VirtRegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + if (RI.Tied) { + markValueUsed(&VirtReg, ParentVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; + } + + // Before rematerializing into a register for a single instruction, try to + // fold a load into the instruction. That avoids allocating a new register. + if (RM.OrigMI->canFoldAsLoad() && + foldMemoryOperand(Ops, RM.OrigMI)) { + Edit->markRematerialized(RM.ParentVNI); + ++NumFoldedLoads; + return true; + } + + // Alocate a new register for the remat. + LiveInterval &NewLI = Edit->createFrom(Original); + NewLI.markNotSpillable(); + + // Finally we can rematerialize OrigMI before MI. + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + TRI); + DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' + << *LIS.getInstructionFromIndex(DefIdx)); + + // Replace operands + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i].second); + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { + MO.setReg(NewLI.reg); + MO.setIsKill(); + } + } + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + ++NumRemats; + return true; +} + +/// reMaterializeAll - Try to rematerialize as many uses as possible, +/// and trim the live ranges after. +void InlineSpiller::reMaterializeAll() { + // analyzeSiblingValues has already tested all relevant defining instructions. + if (!Edit->anyRematerializable(AA)) + return; + + UsedValues.clear(); + + // Try to remat before all uses of snippets. + bool anyRemat = false; + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (MachineRegisterInfo::use_nodbg_iterator + RI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = RI.skipBundle();) + anyRemat |= reMaterializeFor(LI, MI); + } + if (!anyRemat) + return; + + // Remove any values that were completely rematted. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + MI->addRegisterDead(Reg, &TRI); + if (!MI->allDefsAreDead()) + continue; + DEBUG(dbgs() << "All defs dead: " << *MI); + DeadDefs.push_back(MI); + } + } + + // Eliminate dead code after remat. Note that some snippet copies may be + // deleted here. + if (DeadDefs.empty()) + return; + DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); + + // Get rid of deleted and empty intervals. + for (unsigned i = RegsToSpill.size(); i != 0; --i) { + unsigned Reg = RegsToSpill[i-1]; + if (!LIS.hasInterval(Reg)) { + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + if (!LI.empty()) + continue; + Edit->eraseVirtReg(Reg); + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + } + DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); +} + + +//===----------------------------------------------------------------------===// +// Spilling +//===----------------------------------------------------------------------===// + +/// If MI is a load or store of StackSlot, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { + int FI = 0; + unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI); + bool IsLoad = InstrReg; + if (!IsLoad) + InstrReg = TII.isStoreToStackSlot(MI, FI); + + // We have a stack access. Is it the right register and slot? + if (InstrReg != Reg || FI != StackSlot) + return false; + + DEBUG(dbgs() << "Coalescing stack access: " << *MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + + if (IsLoad) { + ++NumReloadsRemoved; + --NumReloads; + } else { + ++NumSpillsRemoved; + --NumSpills; + } + + return true; +} + +/// foldMemoryOperand - Try folding stack slot references in Ops into their +/// instructions. +/// +/// @param Ops Operand indices from analyzeVirtReg(). +/// @param LoadMI Load instruction to use instead of stack slot when non-null. +/// @return True on success. +bool InlineSpiller:: +foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, + MachineInstr *LoadMI) { + if (Ops.empty()) + return false; + // Don't attempt folding in bundles. + MachineInstr *MI = Ops.front().first; + if (Ops.back().first != MI || MI->isBundled()) + return false; + + bool WasCopy = MI->isCopy(); + unsigned ImpReg = 0; + + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied + // operands. + SmallVector<unsigned, 8> FoldOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned Idx = Ops[i].second; + MachineOperand &MO = MI->getOperand(Idx); + if (MO.isImplicit()) { + ImpReg = MO.getReg(); + continue; + } + // FIXME: Teach targets to deal with subregs. + if (MO.getSubReg()) + return false; + // We cannot fold a load instruction into a def. + if (LoadMI && MO.isDef()) + return false; + // Tied use operands should not be passed to foldMemoryOperand. + if (!MI->isRegTiedToDefOperand(Idx)) + FoldOps.push_back(Idx); + } + + MachineInstr *FoldMI = + LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) + : TII.foldMemoryOperand(MI, FoldOps, StackSlot); + if (!FoldMI) + return false; + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); + MI->eraseFromParent(); + + // TII.foldMemoryOperand may have left some implicit operands on the + // instruction. Strip them. + if (ImpReg) + for (unsigned i = FoldMI->getNumOperands(); i; --i) { + MachineOperand &MO = FoldMI->getOperand(i - 1); + if (!MO.isReg() || !MO.isImplicit()) + break; + if (MO.getReg() == ImpReg) + FoldMI->RemoveOperand(i - 1); + } + + DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' + << *FoldMI); + if (!WasCopy) + ++NumFolded; + else if (Ops.front().second == 0) + ++NumSpills; + else + ++NumReloads; + return true; +} + +/// insertReload - Insert a reload of NewLI.reg before MI. +void InlineSpiller::insertReload(LiveInterval &NewLI, + SlotIndex Idx, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); + --MI; // Point to load instruction. + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); + // Some (out-of-tree) targets have EC reload instructions. + if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) + if (MO->isEarlyClobber()) + LoadIdx = LoadIdx.getRegSlot(true); + DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); + ++NumReloads; +} + +/// insertSpill - Insert a spill of NewLI.reg after MI. +void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); + --MI; // Point to store instruction. + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); + DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); + ++NumSpills; +} + +/// spillAroundUses - insert spill code around each use of Reg. +void InlineSpiller::spillAroundUses(unsigned Reg) { + DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n'); + LiveInterval &OldLI = LIS.getInterval(Reg); + + // Iterate over instructions using Reg. + for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg); + MachineInstr *MI = RegI.skipBundle();) { + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else { + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + MI->eraseFromParent(); + } + continue; + } + + // Ignore copies to/from snippets. We'll delete them. + if (SnippetCopies.count(MI)) + continue; + + // Stack slot accesses may coalesce away. + if (coalesceStackAccess(MI, Reg)) + continue; + + // Analyze instruction. + SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; + MIBundleOperands::VirtRegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); + + // Find the slot index where this instruction reads and writes OldLI. + // This is usually the def slot, except for tied early clobbers. + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) + if (SlotIndex::isSameInstr(Idx, VNI->def)) + Idx = VNI->def; + + // Check for a sibling copy. + unsigned SibReg = isFullCopyOf(MI, Reg); + if (SibReg && isSibling(SibReg)) { + // This may actually be a copy between snippets. + if (isRegToSpill(SibReg)) { + DEBUG(dbgs() << "Found new snippet copy: " << *MI); + SnippetCopies.insert(MI); + continue; + } + if (RI.Writes) { + // Hoist the spill of a sib-reg copy. + if (hoistSpill(OldLI, MI)) { + // This COPY is now dead, the value is already in the stack slot. + MI->getOperand(0).setIsDead(); + DeadDefs.push_back(MI); + continue; + } + } else { + // This is a reload for a sib-reg copy. Drop spills downstream. + LiveInterval &SibLI = LIS.getInterval(SibReg); + eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); + // The COPY will fold to a reload below. + } + } + + // Attempt to fold memory ops. + if (foldMemoryOperand(Ops)) + continue; + + // Allocate interval around instruction. + // FIXME: Infer regclass from instruction alone. + LiveInterval &NewLI = Edit->createFrom(Reg); + NewLI.markNotSpillable(); + + if (RI.Reads) + insertReload(NewLI, Idx, MI); + + // Rewrite instruction operands. + bool hasLiveDef = false; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); + MO.setReg(NewLI.reg); + if (MO.isUse()) { + if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) + MO.setIsKill(); + } else { + if (!MO.isDead()) + hasLiveDef = true; + } + } + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); + + // FIXME: Use a second vreg if instruction has no tied ops. + if (RI.Writes) { + if (hasLiveDef) + insertSpill(NewLI, OldLI, Idx, MI); + else { + // This instruction defines a dead value. We don't need to spill it, + // but do create a live range for the dead value. + VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); + } + } + + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + } +} + +/// spillAll - Spill all registers remaining after rematerialization. +void InlineSpiller::spillAll() { + // Update LiveStacks now that we are committed to spilling. + if (StackSlot == VirtRegMap::NO_STACK_SLOT) { + StackSlot = VRM.assignVirt2StackSlot(Original); + StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); + StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator()); + } else + StackInt = &LSS.getInterval(StackSlot); + + if (Original != Edit->getReg()) + VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); + + assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); + + // Spill around uses of all RegsToSpill. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + spillAroundUses(RegsToSpill[i]); + + // Hoisted spills may cause dead code. + if (!DeadDefs.empty()) { + DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); + } + + // Finally delete the SnippetCopies. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]); + MachineInstr *MI = RI.skipInstruction();) { + assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); + // FIXME: Do this with a LiveRangeEdit callback. + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + } + + // Delete all spilled registers. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + Edit->eraseVirtReg(RegsToSpill[i]); +} + +void InlineSpiller::spill(LiveRangeEdit &edit) { + ++NumSpilledRanges; + Edit = &edit; + assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) + && "Trying to spill a stack slot."); + // Share a stack slot among all descendants of Original. + Original = VRM.getOriginal(edit.getReg()); + StackSlot = VRM.getStackSlot(Original); + StackInt = 0; + + DEBUG(dbgs() << "Inline spilling " + << MRI.getRegClass(edit.getReg())->getName() + << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() + << "\nFrom original " << LIS.getInterval(Original) << '\n'); + assert(edit.getParent().isSpillable() && + "Attempting to spill already spilled value."); + assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); + + collectRegsToSpill(); + analyzeSiblingValues(); + reMaterializeAll(); + + // Remat may handle everything. + if (!RegsToSpill.empty()) + spillAll(); + + Edit->calculateRegClassAndHint(MF, Loops); +} diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp new file mode 100644 index 0000000..a8e711e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -0,0 +1,231 @@ +//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "InterferenceCache.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +// Static member used for null interference cursors. +InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; + +void InterferenceCache::init(MachineFunction *mf, + LiveIntervalUnion *liuarray, + SlotIndexes *indexes, + LiveIntervals *lis, + const TargetRegisterInfo *tri) { + MF = mf; + LIUArray = liuarray; + TRI = tri; + PhysRegEntries.assign(TRI->getNumRegs(), 0); + for (unsigned i = 0; i != CacheEntries; ++i) + Entries[i].clear(mf, indexes, lis); +} + +InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { + unsigned E = PhysRegEntries[PhysReg]; + if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { + if (!Entries[E].valid(LIUArray, TRI)) + Entries[E].revalidate(LIUArray, TRI); + return &Entries[E]; + } + // No valid entry exists, pick the next round-robin entry. + E = RoundRobin; + if (++RoundRobin == CacheEntries) + RoundRobin = 0; + for (unsigned i = 0; i != CacheEntries; ++i) { + // Skip entries that are in use. + if (Entries[E].hasRefs()) { + if (++E == CacheEntries) + E = 0; + continue; + } + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; + } + llvm_unreachable("Ran out of interference cache entries."); +} + +/// revalidate - LIU contents have changed, update tags. +void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { + // Invalidate all block entries. + ++Tag; + // Invalidate all iterators. + PrevPos = SlotIndex(); + unsigned i = 0; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) + RegUnits[i].VirtTag = LIUArray[*Units].getTag(); +} + +void InterferenceCache::Entry::reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF) { + assert(!hasRefs() && "Cannot reset cache entry with references"); + // LIU's changed, invalidate cache. + ++Tag; + PhysReg = physReg; + Blocks.resize(MF->getNumBlockIDs()); + + // Reset iterators. + PrevPos = SlotIndex(); + RegUnits.clear(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + RegUnits.push_back(LIUArray[*Units]); + RegUnits.back().Fixed = &LIS->getRegUnit(*Units); + } +} + +bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { + unsigned i = 0, e = RegUnits.size(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) { + if (i == e) + return false; + if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag)) + return false; + } + return i == e; +} + +void InterferenceCache::Entry::update(unsigned MBBNum) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + + // Use advanceTo only when possible. + if (PrevPos != Start) { + if (!PrevPos.isValid() || Start < PrevPos) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.find(Start); + RUI.FixedI = RUI.Fixed->find(Start); + } + } else { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.advanceTo(Start); + if (RUI.FixedI != RUI.Fixed->end()) + RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start); + } + } + PrevPos = Start; + } + + MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); + BlockInterference *BI = &Blocks[MBBNum]; + ArrayRef<SlotIndex> RegMaskSlots; + ArrayRef<const uint32_t*> RegMaskBits; + for (;;) { + BI->Tag = Tag; + BI->First = BI->Last = SlotIndex(); + + // Check for first interference from virtregs. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; + if (!I.valid()) + continue; + SlotIndex StartI = I.start(); + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + + // Same thing for fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::const_iterator I = RegUnits[i].FixedI; + LiveInterval::const_iterator E = RegUnits[i].Fixed->end(); + if (I == E) + continue; + SlotIndex StartI = I->start; + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + + // Also check for register mask interference. + RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum); + RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); + SlotIndex Limit = BI->First.isValid() ? BI->First : Stop; + for (unsigned i = 0, e = RegMaskSlots.size(); + i != e && RegMaskSlots[i] < Limit; ++i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) { + // Register mask i clobbers PhysReg before the LIU interference. + BI->First = RegMaskSlots[i]; + break; + } + + PrevPos = Stop; + if (BI->First.isValid()) + break; + + // No interference in this block? Go ahead and precompute the next block. + if (++MFI == MF->end()) + return; + MBBNum = MFI->getNumber(); + BI = &Blocks[MBBNum]; + if (BI->Tag == Tag) + return; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + } + + // Check for last interference in block. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; + if (!I.valid() || I.start() >= Stop) + continue; + I.advanceTo(Stop); + bool Backup = !I.valid() || I.start() >= Stop; + if (Backup) + --I; + SlotIndex StopI = I.stop(); + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } + + // Fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::iterator &I = RegUnits[i].FixedI; + LiveInterval *LI = RegUnits[i].Fixed; + if (I == LI->end() || I->start >= Stop) + continue; + I = LI->advanceTo(I, Stop); + bool Backup = I == LI->end() || I->start >= Stop; + if (Backup) + --I; + SlotIndex StopI = I->end; + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } + + // Also check for register mask interference. + SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; + for (unsigned i = RegMaskSlots.size(); + i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) { + // Register mask i-1 clobbers PhysReg after the LIU interference. + // Model the regmask clobber as a dead def. + BI->Last = RegMaskSlots[i-1].getDeadSlot(); + break; + } +} diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h new file mode 100644 index 0000000..c02fb9a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -0,0 +1,228 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference from LiveIntervalUnions, +// fixed RegUnit interference, and register masks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_INTERFERENCECACHE +#define LLVM_CODEGEN_INTERFERENCECACHE + +#include "llvm/CodeGen/LiveIntervalUnion.h" + +namespace llvm { + +class LiveIntervals; + +class InterferenceCache { + const TargetRegisterInfo *TRI; + LiveIntervalUnion *LIUArray; + MachineFunction *MF; + + /// BlockInterference - information about the interference in a single basic + /// block. + struct BlockInterference { + BlockInterference() : Tag(0) {} + unsigned Tag; + SlotIndex First; + SlotIndex Last; + }; + + /// Entry - A cache entry containing interference information for all aliases + /// of PhysReg in all basic blocks. + class Entry { + /// PhysReg - The register currently represented. + unsigned PhysReg; + + /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions + /// change. + unsigned Tag; + + /// RefCount - The total number of Cursor instances referring to this Entry. + unsigned RefCount; + + /// MF - The current function. + MachineFunction *MF; + + /// Indexes - Mapping block numbers to SlotIndex ranges. + SlotIndexes *Indexes; + + /// LIS - Used for accessing register mask interference maps. + LiveIntervals *LIS; + + /// PrevPos - The previous position the iterators were moved to. + SlotIndex PrevPos; + + /// RegUnitInfo - Information tracked about each RegUnit in PhysReg. + /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos) + /// had just been called. + struct RegUnitInfo { + /// Iterator pointing into the LiveIntervalUnion containing virtual + /// register interference. + LiveIntervalUnion::SegmentIter VirtI; + + /// Tag of the LIU last time we looked. + unsigned VirtTag; + + /// Fixed interference in RegUnit. + LiveInterval *Fixed; + + /// Iterator pointing into the fixed RegUnit interference. + LiveInterval::iterator FixedI; + + RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) { + VirtI.setMap(LIU.getMap()); + } + }; + + /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have + /// more than 4 RegUnits. + SmallVector<RegUnitInfo, 4> RegUnits; + + /// Blocks - Interference for each block in the function. + SmallVector<BlockInterference, 8> Blocks; + + /// update - Recompute Blocks[MBBNum] + void update(unsigned MBBNum); + + public: + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {} + + void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { + assert(!hasRefs() && "Cannot clear cache entry with references"); + PhysReg = 0; + MF = mf; + Indexes = indexes; + LIS = lis; + } + + unsigned getPhysReg() const { return PhysReg; } + + void addRef(int Delta) { RefCount += Delta; } + + bool hasRefs() const { return RefCount > 0; } + + void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); + + /// valid - Return true if this is a valid entry for physReg. + bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); + + /// reset - Initialize entry to represent physReg's aliases. + void reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF); + + /// get - Return an up to date BlockInterference. + BlockInterference *get(unsigned MBBNum) { + if (Blocks[MBBNum].Tag != Tag) + update(MBBNum); + return &Blocks[MBBNum]; + } + }; + + // We don't keep a cache entry for every physical register, that would use too + // much memory. Instead, a fixed number of cache entries are used in a round- + // robin manner. + enum { CacheEntries = 32 }; + + // Point to an entry for each physreg. The entry pointed to may not be up to + // date, and it may have been reused for a different physreg. + SmallVector<unsigned char, 2> PhysRegEntries; + + // Next round-robin entry to be picked. + unsigned RoundRobin; + + // The actual cache entries. + Entry Entries[CacheEntries]; + + // get - Get a valid entry for PhysReg. + Entry *get(unsigned PhysReg); + +public: + InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {} + + /// init - Prepare cache for a new function. + void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*, + const TargetRegisterInfo *); + + /// getMaxCursors - Return the maximum number of concurrent cursors that can + /// be supported. + unsigned getMaxCursors() const { return CacheEntries; } + + /// Cursor - The primary query interface for the block interference cache. + class Cursor { + Entry *CacheEntry; + BlockInterference *Current; + static BlockInterference NoInterference; + + void setEntry(Entry *E) { + Current = 0; + // Update reference counts. Nothing happens when RefCount reaches 0, so + // we don't have to check for E == CacheEntry etc. + if (CacheEntry) + CacheEntry->addRef(-1); + CacheEntry = E; + if (CacheEntry) + CacheEntry->addRef(+1); + } + + public: + /// Cursor - Create a dangling cursor. + Cursor() : CacheEntry(0), Current(0) {} + ~Cursor() { setEntry(0); } + + Cursor(const Cursor &O) : CacheEntry(0), Current(0) { + setEntry(O.CacheEntry); + } + + Cursor &operator=(const Cursor &O) { + setEntry(O.CacheEntry); + return *this; + } + + /// setPhysReg - Point this cursor to PhysReg's interference. + void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { + // Release reference before getting a new one. That guarantees we can + // actually have CacheEntries live cursors. + setEntry(0); + if (PhysReg) + setEntry(Cache.get(PhysReg)); + } + + /// moveTo - Move cursor to basic block MBBNum. + void moveToBlock(unsigned MBBNum) { + Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference; + } + + /// hasInterference - Return true if the current block has any interference. + bool hasInterference() { + return Current->First.isValid(); + } + + /// first - Return the starting index of the first interfering range in the + /// current block. + SlotIndex first() { + return Current->First; + } + + /// last - Return the ending index of the last interfering range in the + /// current block. + SlotIndex last() { + return Current->Last; + } + }; + + friend class Cursor; +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp new file mode 100644 index 0000000..07f0ccf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -0,0 +1,565 @@ +//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IntrinsicLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +template <class ArgIt> +static void EnsureFunctionExists(Module &M, const char *Name, + ArgIt ArgBegin, ArgIt ArgEnd, + Type *RetTy) { + // Insert a correctly-typed definition now. + std::vector<Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back(I->getType()); + M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); +} + +static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, + const char *FName, + const char *DName, const char *LDName) { + // Insert definitions for all the floating point types. + switch((int)Fn->arg_begin()->getType()->getTypeID()) { + case Type::FloatTyID: + EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), + Type::getFloatTy(M.getContext())); + break; + case Type::DoubleTyID: + EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), + Type::getDoubleTy(M.getContext())); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(), + Fn->arg_begin()->getType()); + break; + } +} + +/// ReplaceCallWith - This function is used when we want to lower an intrinsic +/// call to a call of an external function. This handles hard cases such as +/// when there was already a prototype for the external function, and if that +/// prototype doesn't match the arguments we expect to pass in. +template <class ArgIt> +static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, + ArgIt ArgBegin, ArgIt ArgEnd, + Type *RetTy) { + // If we haven't already looked up this function, check to see if the + // program already contains a function with this name. + Module *M = CI->getParent()->getParent()->getParent(); + // Get or insert the definition now. + std::vector<Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back((*I)->getType()); + Constant* FCache = M->getOrInsertFunction(NewFn, + FunctionType::get(RetTy, ParamTys, false)); + + IRBuilder<> Builder(CI->getParent(), CI); + SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); + CallInst *NewCI = Builder.CreateCall(FCache, Args); + NewCI->setName(CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + return NewCI; +} + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + +void IntrinsicLowering::AddPrototypes(Module &M) { + LLVMContext &Context = M.getContext(); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->isDeclaration() && !I->use_empty()) + switch (I->getIntrinsicID()) { + default: break; + case Intrinsic::setjmp: + EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), + Type::getInt32Ty(M.getContext())); + break; + case Intrinsic::longjmp: + EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), + Type::getVoidTy(M.getContext())); + break; + case Intrinsic::siglongjmp: + EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), + Type::getVoidTy(M.getContext())); + break; + case Intrinsic::memcpy: + M.getOrInsertFunction("memcpy", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); + break; + case Intrinsic::memmove: + M.getOrInsertFunction("memmove", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); + break; + case Intrinsic::memset: + M.getOrInsertFunction("memset", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt32Ty(M.getContext()), + TD.getIntPtrType(Context), (Type *)0); + break; + case Intrinsic::sqrt: + EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); + break; + case Intrinsic::sin: + EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl"); + break; + case Intrinsic::cos: + EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl"); + break; + case Intrinsic::pow: + EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl"); + break; + case Intrinsic::log: + EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl"); + break; + case Intrinsic::log2: + EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l"); + break; + case Intrinsic::log10: + EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l"); + break; + case Intrinsic::exp: + EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl"); + break; + case Intrinsic::exp2: + EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l"); + break; + } +} + +/// LowerBSWAP - Emit the code to lower bswap of V before the specified +/// instruction IP. +static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { + assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + + IRBuilder<> Builder(IP->getParent(), IP); + + switch(BitSize) { + default: llvm_unreachable("Unhandled type size of value to byteswap!"); + case 16: { + Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.1"); + V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16"); + break; + } + case 32: { + Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.4"); + Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.3"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24), + "bswap.1"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF00), + "bswap.and2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); + V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32"); + break; + } + case 64: { + Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56), + "bswap.8"); + Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40), + "bswap.7"); + Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.6"); + Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.5"); + Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.4"); + Value* Tmp3 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 24), + "bswap.3"); + Value* Tmp2 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 40), + "bswap.2"); + Value* Tmp1 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 56), + "bswap.1"); + Tmp7 = Builder.CreateAnd(Tmp7, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000000000ULL), + "bswap.and7"); + Tmp6 = Builder.CreateAnd(Tmp6, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000000000ULL), + "bswap.and6"); + Tmp5 = Builder.CreateAnd(Tmp5, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00000000ULL), + "bswap.and5"); + Tmp4 = Builder.CreateAnd(Tmp4, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000ULL), + "bswap.and4"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000ULL), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00ULL), + "bswap.and2"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); + Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6"); + V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64"); + break; + } + } + return V; +} + +/// LowerCTPOP - Emit the code to lower ctpop of V before the specified +/// instruction IP. +static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { + assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!"); + + static const uint64_t MaskValues[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned WordSize = (BitSize + 63) / 64; + Value *Count = ConstantInt::get(V->getType(), 0); + + for (unsigned n = 0; n < WordSize; ++n) { + Value *PartValue = V; + for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); + i <<= 1, ++ct) { + Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); + Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); + Value *VShift = Builder.CreateLShr(PartValue, + ConstantInt::get(V->getType(), i), + "ctpop.sh"); + Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2"); + PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step"); + } + Count = Builder.CreateAdd(PartValue, Count, "ctpop.part"); + if (BitSize > 64) { + V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64), + "ctpop.part.sh"); + BitSize -= 64; + } + } + + return Count; +} + +/// LowerCTLZ - Emit the code to lower ctlz of V before the specified +/// instruction IP. +static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { + + IRBuilder<> Builder(IP->getParent(), IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + for (unsigned i = 1; i < BitSize; i <<= 1) { + Value *ShVal = ConstantInt::get(V->getType(), i); + ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh"); + V = Builder.CreateOr(V, ShVal, "ctlz.step"); + } + + V = Builder.CreateNot(V); + return LowerCTPOP(Context, V, IP); +} + +static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, + const char *Dname, + const char *LDname) { + CallSite CS(CI); + switch (CI->getArgOperand(0)->getType()->getTypeID()) { + default: llvm_unreachable("Invalid type in intrinsic"); + case Type::FloatTyID: + ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(), + Type::getFloatTy(CI->getContext())); + break; + case Type::DoubleTyID: + ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(), + Type::getDoubleTy(CI->getContext())); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(), + CI->getArgOperand(0)->getType()); + break; + } +} + +void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { + IRBuilder<> Builder(CI->getParent(), CI); + LLVMContext &Context = CI->getContext(); + + const Function *Callee = CI->getCalledFunction(); + assert(Callee && "Cannot lower an indirect call!"); + + CallSite CS(CI); + switch (Callee->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ + Callee->getName() + "'!"); + default: + report_fatal_error("Code generator does not support intrinsic function '"+ + Callee->getName()+"'!"); + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + Value *V = CI->getArgOperand(0); + CI->replaceAllUsesWith(V); + break; + } + + // The setjmp/longjmp intrinsics should only exist in the code if it was + // never optimized (ie, right out of the CFE), or if it has been hacked on + // by the lowerinvoke pass. In both cases, the right thing to do is to + // convert the call to an explicit setjmp or longjmp call. + case Intrinsic::setjmp: { + Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(), + Type::getInt32Ty(Context)); + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(V); + break; + } + case Intrinsic::sigsetjmp: + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::longjmp: { + ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(), + Type::getVoidTy(Context)); + break; + } + + case Intrinsic::siglongjmp: { + // Insert the call to abort + ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), + Type::getVoidTy(Context)); + break; + } + case Intrinsic::ctpop: + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::bswap: + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::ctlz: + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::cttz: { + // cttz(x) -> ctpop(~X & (X-1)) + Value *Src = CI->getArgOperand(0); + Value *NotSrc = Builder.CreateNot(Src); + NotSrc->setName(Src->getName() + ".not"); + Value *SrcM1 = ConstantInt::get(Src->getType(), 1); + SrcM1 = Builder.CreateSub(Src, SrcM1); + Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI); + CI->replaceAllUsesWith(Src); + break; + } + + case Intrinsic::stacksave: + case Intrinsic::stackrestore: { + if (!Warned) + errs() << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; + Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + } + + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + errs() << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; + CI->replaceAllUsesWith(ConstantPointerNull::get( + cast<PointerType>(CI->getType()))); + break; + + case Intrinsic::prefetch: + break; // Simply strip out prefetches on unsupported architectures + + case Intrinsic::pcmarker: + break; // Simply strip out pcmarker on unsupported architectures + case Intrinsic::readcyclecounter: { + errs() << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); + break; + } + + case Intrinsic::dbg_declare: + break; // Simply strip out debugging intrinsics + + case Intrinsic::eh_typeid_for: + // Return something different to eh_selector. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + + case Intrinsic::var_annotation: + break; // Strip out annotate intrinsic + + case Intrinsic::memcpy: { + Type *IntPtr = TD.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); + Ops[2] = Size; + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::memmove: { + Type *IntPtr = TD.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); + Ops[2] = Size; + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::memset: { + Type *IntPtr = TD.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + // Extend the amount to i32. + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), + Type::getInt32Ty(Context), + /* isSigned */ false); + Ops[2] = Size; + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::sqrt: { + ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl"); + break; + } + case Intrinsic::log: { + ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl"); + break; + } + case Intrinsic::log2: { + ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l"); + break; + } + case Intrinsic::log10: { + ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l"); + break; + } + case Intrinsic::exp: { + ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl"); + break; + } + case Intrinsic::exp2: { + ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l"); + break; + } + case Intrinsic::pow: { + ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl"); + break; + } + case Intrinsic::flt_rounds: + // Lower to "round to the nearest" + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + break; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + break; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); +} + +bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { + // Verify this is a simple bswap. + if (CI->getNumArgOperands() != 1 || + CI->getType() != CI->getArgOperand(0)->getType() || + !CI->getType()->isIntegerTy()) + return false; + + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty) + return false; + + // Okay, we can do this xform, do so now. + Module *M = CI->getParent()->getParent()->getParent(); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); + + Value *Op = CI->getArgOperand(0); + Op = CallInst::Create(Int, Op, CI->getName(), CI); + + CI->replaceAllUsesWith(Op); + CI->eraseFromParent(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp new file mode 100644 index 0000000..96a5389 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/JITCodeEmitter.h" + +using namespace llvm; + +void JITCodeEmitter::anchor() { } diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp new file mode 100644 index 0000000..1a09837 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -0,0 +1,296 @@ +//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVMTargetMachine class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -O0. +static cl::opt<cl::boolOrDefault> +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the \"fast\" instruction selector")); + +static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden, + cl::desc("Show encoding in .s output")); +static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden, + cl::desc("Show instruction structure in .s output")); + +static cl::opt<cl::boolOrDefault> +AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), + cl::init(cl::BOU_UNSET)); + +static bool getVerboseAsm() { + switch (AsmVerbose) { + case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } + llvm_unreachable("Invalid verbose asm state"); +} + +LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : TargetMachine(T, Triple, CPU, FS, Options) { + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); + AsmInfo = T.createMCAsmInfo(Triple); + // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, + // and if the old one gets included then MCAsmInfo will be NULL and + // we'll crash later. + // Provide the user with a useful error message about what's wrong. + assert(AsmInfo && "MCAsmInfo not initialized." + "Make sure you include the correct TargetSelect.h" + "and that InitializeAllTargetMCs() is being invoked!"); +} + +void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); +} + +/// addPassesToX helper drives creation and initialization of TargetPassConfig. +static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, + PassManagerBase &PM, + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { + // Targets may override createPassConfig to provide a target-specific sublass. + TargetPassConfig *PassConfig = TM->createPassConfig(PM); + PassConfig->setStartStopPasses(StartAfter, StopAfter); + + // Set PassConfig options provided by TargetMachine. + PassConfig->setDisableVerify(DisableVerify); + + PM.add(PassConfig); + + PassConfig->addIRPasses(); + + PassConfig->addCodeGenPrepare(); + + PassConfig->addPassesToHandleExceptions(); + + PassConfig->addISelPrepare(); + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = + new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), + &TM->getTargetLowering()->getObjFileLowering()); + PM.add(MMI); + MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*TM)); + + // Enable FastISel with -fast, but allow that to be overridden. + if (EnableFastISelOption == cl::BOU_TRUE || + (TM->getOptLevel() == CodeGenOpt::None && + EnableFastISelOption != cl::BOU_FALSE)) + TM->setFastISel(true); + + // Ask the target for an isel. + if (PassConfig->addInstSelector()) + return NULL; + + PassConfig->addMachinePasses(); + + PassConfig->setInitialized(); + + return Context; +} + +bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { + // Add common CodeGen passes. + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, + StartAfter, StopAfter); + if (!Context) + return true; + + if (StopAfter) { + // FIXME: The intent is that this should eventually write out a YAML file, + // containing the LLVM IR, the machine-level IR (when stopping after a + // machine-level pass), and whatever other information is needed to + // deserialize the code and resume compilation. For now, just write the + // LLVM IR. + PM.add(createPrintModulePass(&Out)); + return false; + } + + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + OwningPtr<MCStreamer> AsmStreamer; + + switch (FileType) { + case CGFT_AssemblyFile: { + MCInstPrinter *InstPrinter = + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, + *getInstrInfo(), + Context->getRegisterInfo(), STI); + + // Create a code emitter if asked to show the encoding. + MCCodeEmitter *MCE = 0; + MCAsmBackend *MAB = 0; + if (ShowMCEncoding) { + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, + *Context); + MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); + } + + MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, + getVerboseAsm(), + hasMCUseLoc(), + hasMCUseCFI(), + hasMCUseDwarfDirectory(), + InstPrinter, + MCE, MAB, + ShowMCInst); + AsmStreamer.reset(S); + break; + } + case CGFT_ObjectFile: { + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, + STI, *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), + TargetCPU); + if (MCE == 0 || MAB == 0) + return true; + + AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), + *Context, *MAB, Out, + MCE, hasMCRelaxAll(), + hasMCNoExecStack())); + AsmStreamer.get()->setAutoInitSections(true); + break; + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(createNullStreamer(*Context)); + break; + } + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + + return false; +} + +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a JITCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should returns true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + JITCodeEmitter &JCE, + bool DisableVerify) { + // Add common CodeGen passes. + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); + if (!Context) + return true; + + addCodeEmitter(PM, JCE); + + return false; // success! +} + +/// addPassesToEmitMC - Add passes to the specified pass manager to get +/// machine code emitted with the MCJIT. This method returns true if machine +/// code is not supported. It fills the MCContext Ctx pointer which can be +/// used to build custom MCStreamer. +/// +bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, + MCContext *&Ctx, + raw_ostream &Out, + bool DisableVerify) { + // Add common CodeGen passes. + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); + if (!Ctx) + return true; + + if (hasMCSaveTempLabels()) + Ctx->setAllowTemporaryLabels(false); + + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, + STI, *Ctx); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); + if (MCE == 0 || MAB == 0) + return true; + + OwningPtr<MCStreamer> AsmStreamer; + AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx, + *MAB, Out, MCE, + hasMCRelaxAll(), + hasMCNoExecStack())); + AsmStreamer.get()->InitSections(); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + + return false; // success! +} diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp new file mode 100644 index 0000000..deab05a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -0,0 +1,152 @@ +//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LatencyPriorityQueue class, which is a +// SchedulingPriorityQueue that schedules using latency information to +// reduce the length of the critical path through the basic block. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return RHSNum < LHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + + return OnlyAvailablePred; +} + +void LatencyPriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + } + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + + Queue.push_back(SU); +} + + +// scheduledNode - As nodes are scheduled, we look to see if there are any +// successor nodes that have a single unscheduled predecessor. If so, that +// single predecessor has a higher priority, since scheduling it will make +// the node available. +void LatencyPriorityQueue::scheduledNode(SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + AdjustPriorityOfUnscheduledPreds(I->getSUnit()); + } +} + +/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + +SUnit *LatencyPriorityQueue::pop() { + if (empty()) return NULL; + std::vector<SUnit *>::iterator Best = Queue.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + Queue.pop_back(); + return V; +} + +void LatencyPriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); +} + +#ifdef NDEBUG +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { + LatencyPriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp new file mode 100644 index 0000000..8172154 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -0,0 +1,335 @@ +//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements LexicalScopes analysis. +// +// This pass collects lexical scope information and maps machine instructions +// to respective lexical scopes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lexicalscopes" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +LexicalScopes::~LexicalScopes() { + releaseMemory(); +} + +/// releaseMemory - release memory. +void LexicalScopes::releaseMemory() { + MF = NULL; + CurrentFnLexicalScope = NULL; + DeleteContainerSeconds(LexicalScopeMap); + DeleteContainerSeconds(AbstractScopeMap); + InlinedLexicalScopeMap.clear(); + AbstractScopesList.clear(); +} + +/// initialize - Scan machine function and constuct lexical scope nest. +void LexicalScopes::initialize(const MachineFunction &Fn) { + releaseMemory(); + MF = &Fn; + SmallVector<InsnRange, 4> MIRanges; + DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap; + extractLexicalScopes(MIRanges, MI2ScopeMap); + if (CurrentFnLexicalScope) { + constructScopeNest(CurrentFnLexicalScope); + assignInstructionRanges(MIRanges, MI2ScopeMap); + } +} + +/// extractLexicalScopes - Extract instruction ranges for each lexical scopes +/// for the given machine function. +void LexicalScopes:: +extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { + + // Scan each instruction and create scopes. First build working set of scopes. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + const MachineInstr *RangeBeginMI = NULL; + const MachineInstr *PrevMI = NULL; + DebugLoc PrevDL; + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + + // Check if instruction has valid location information. + const DebugLoc MIDL = MInsn->getDebugLoc(); + if (MIDL.isUnknown()) { + PrevMI = MInsn; + continue; + } + + // If scope has not changed then skip this instruction. + if (MIDL == PrevDL) { + PrevMI = MInsn; + continue; + } + + // Ignore DBG_VALUE. It does not contribute to any instruction in output. + if (MInsn->isDebugValue()) + continue; + + if (RangeBeginMI) { + // If we have already seen a beginning of an instruction range and + // current instruction scope does not match scope of first instruction + // in this range then create a new instruction range. + InsnRange R(RangeBeginMI, PrevMI); + MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); + MIRanges.push_back(R); + } + + // This is a beginning of a new instruction range. + RangeBeginMI = MInsn; + + // Reset previous markers. + PrevMI = MInsn; + PrevDL = MIDL; + } + + // Create last instruction range. + if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { + InsnRange R(RangeBeginMI, PrevMI); + MIRanges.push_back(R); + MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); + } + } +} + +/// findLexicalScope - Find lexical scope, either regular or inlined, for the +/// given DebugLoc. Return NULL if not found. +LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { + MDNode *Scope = NULL; + MDNode *IA = NULL; + DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); + if (!Scope) return NULL; + + // The scope that we were created with could have an extra file - which + // isn't what we care about in this case. + DIDescriptor D = DIDescriptor(Scope); + if (D.isLexicalBlockFile()) + Scope = DILexicalBlockFile(Scope).getScope(); + + if (IA) + return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA)); + return LexicalScopeMap.lookup(Scope); +} + +/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If +/// not available then create new lexical scope. +LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); + + if (InlinedAt) { + // Create an abstract scope for inlined function. + getOrCreateAbstractScope(Scope); + // Create an inlined scope for inlined function. + return getOrCreateInlinedScope(Scope, InlinedAt); + } + + return getOrCreateRegularScope(Scope); +} + +/// getOrCreateRegularScope - Find or create a regular lexical scope. +LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { + DIDescriptor D = DIDescriptor(Scope); + if (D.isLexicalBlockFile()) { + Scope = DILexicalBlockFile(Scope).getScope(); + D = DIDescriptor(Scope); + } + + LexicalScope *WScope = LexicalScopeMap.lookup(Scope); + if (WScope) + return WScope; + + LexicalScope *Parent = NULL; + if (D.isLexicalBlock()) + Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); + WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false); + LexicalScopeMap.insert(std::make_pair(Scope, WScope)); + if (!Parent && DIDescriptor(Scope).isSubprogram() + && DISubprogram(Scope).describes(MF->getFunction())) + CurrentFnLexicalScope = WScope; + + return WScope; +} + +/// getOrCreateInlinedScope - Find or create an inlined lexical scope. +LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, + MDNode *InlinedAt) { + LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt); + if (InlinedScope) + return InlinedScope; + + DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt); + InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc), + DIDescriptor(Scope), InlinedAt, false); + InlinedLexicalScopeMap[InlinedLoc] = InlinedScope; + LexicalScopeMap[InlinedAt] = InlinedScope; + return InlinedScope; +} + +/// getOrCreateAbstractScope - Find or create an abstract lexical scope. +LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { + assert(N && "Invalid Scope encoding!"); + + DIDescriptor Scope(N); + if (Scope.isLexicalBlockFile()) + Scope = DILexicalBlockFile(Scope).getScope(); + LexicalScope *AScope = AbstractScopeMap.lookup(N); + if (AScope) + return AScope; + + LexicalScope *Parent = NULL; + if (Scope.isLexicalBlock()) { + DILexicalBlock DB(N); + DIDescriptor ParentDesc = DB.getContext(); + Parent = getOrCreateAbstractScope(ParentDesc); + } + AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true); + AbstractScopeMap[N] = AScope; + if (DIDescriptor(N).isSubprogram()) + AbstractScopesList.push_back(AScope); + return AScope; +} + +/// constructScopeNest +void LexicalScopes::constructScopeNest(LexicalScope *Scope) { + assert (Scope && "Unable to calculate scop edominance graph!"); + SmallVector<LexicalScope *, 4> WorkStack; + WorkStack.push_back(Scope); + unsigned Counter = 0; + while (!WorkStack.empty()) { + LexicalScope *WS = WorkStack.back(); + const SmallVector<LexicalScope *, 4> &Children = WS->getChildren(); + bool visitedChildren = false; + for (SmallVector<LexicalScope *, 4>::const_iterator SI = Children.begin(), + SE = Children.end(); SI != SE; ++SI) { + LexicalScope *ChildScope = *SI; + if (!ChildScope->getDFSOut()) { + WorkStack.push_back(ChildScope); + visitedChildren = true; + ChildScope->setDFSIn(++Counter); + break; + } + } + if (!visitedChildren) { + WorkStack.pop_back(); + WS->setDFSOut(++Counter); + } + } +} + +/// assignInstructionRanges - Find ranges of instructions covered by each +/// lexical scope. +void LexicalScopes:: +assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) +{ + + LexicalScope *PrevLexicalScope = NULL; + for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(), + RE = MIRanges.end(); RI != RE; ++RI) { + const InsnRange &R = *RI; + LexicalScope *S = MI2ScopeMap.lookup(R.first); + assert (S && "Lost LexicalScope for a machine instruction!"); + if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) + PrevLexicalScope->closeInsnRange(S); + S->openInsnRange(R.first); + S->extendInsnRange(R.second); + PrevLexicalScope = S; + } + + if (PrevLexicalScope) + PrevLexicalScope->closeInsnRange(); +} + +/// getMachineBasicBlocks - Populate given set using machine basic blocks which +/// have machine instructions that belong to lexical scope identified by +/// DebugLoc. +void LexicalScopes:: +getMachineBasicBlocks(DebugLoc DL, + SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) { + MBBs.clear(); + LexicalScope *Scope = getOrCreateLexicalScope(DL); + if (!Scope) + return; + + if (Scope == CurrentFnLexicalScope) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) + MBBs.insert(I); + return; + } + + SmallVector<InsnRange, 4> &InsnRanges = Scope->getRanges(); + for (SmallVector<InsnRange, 4>::iterator I = InsnRanges.begin(), + E = InsnRanges.end(); I != E; ++I) { + InsnRange &R = *I; + MBBs.insert(R.first->getParent()); + } +} + +/// dominates - Return true if DebugLoc's lexical scope dominates at least one +/// machine instruction's lexical scope in a given machine basic block. +bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { + LexicalScope *Scope = getOrCreateLexicalScope(DL); + if (!Scope) + return false; + + // Current function scope covers all basic blocks in the function. + if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF) + return true; + + bool Result = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + DebugLoc IDL = I->getDebugLoc(); + if (IDL.isUnknown()) + continue; + if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) + if (Scope->dominates(IScope)) + return true; + } + return Result; +} + +void LexicalScope::anchor() { } + +/// dump - Print data structures. +void LexicalScope::dump(unsigned Indent) const { +#ifndef NDEBUG + raw_ostream &err = dbgs(); + err.indent(Indent); + err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; + const MDNode *N = Desc; + err.indent(Indent); + N->dump(); + if (AbstractScope) + err << std::string(Indent, ' ') << "Abstract Scope\n"; + + if (!Children.empty()) + err << std::string(Indent + 2, ' ') << "Children ...\n"; + for (unsigned i = 0, e = Children.size(); i != e; ++i) + if (Children[i] != this) + Children[i]->dump(Indent + 2); +#endif +} + diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp new file mode 100644 index 0000000..0b117ac --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -0,0 +1,995 @@ +//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveDebugVariables analysis. +// +// Remove all DBG_VALUE instructions referencing virtual registers and replace +// them with a data structure tracking where live user variables are kept - in a +// virtual register or in a stack slot. +// +// Allow the data structure to be updated during register allocation when values +// are moved between registers and stack slots. Finally emit new DBG_VALUE +// instructions after register allocation is complete. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "livedebug" +#include "LiveDebugVariables.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +static cl::opt<bool> +EnableLDV("live-debug-variables", cl::init(true), + cl::desc("Enable the live debug variables pass"), cl::Hidden); + +STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); +char LiveDebugVariables::ID = 0; + +INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars", + "Debug Variable Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars", + "Debug Variable Analysis", false, false) + +void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addRequiredTransitive<LiveIntervals>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); +} + +/// LocMap - Map of where a user value is live, and its location. +typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; + +namespace { +/// UserValueScopes - Keeps track of lexical scopes associated with an +/// user value's source location. +class UserValueScopes { + DebugLoc DL; + LexicalScopes &LS; + SmallPtrSet<const MachineBasicBlock *, 4> LBlocks; + +public: + UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {} + + /// dominates - Return true if current scope dominates at least one machine + /// instruction in a given machine basic block. + bool dominates(MachineBasicBlock *MBB) { + if (LBlocks.empty()) + LS.getMachineBasicBlocks(DL, LBlocks); + if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB)) + return true; + return false; + } +}; +} // end anonymous namespace + +/// UserValue - A user value is a part of a debug info user variable. +/// +/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register +/// holds part of a user variable. The part is identified by a byte offset. +/// +/// UserValues are grouped into equivalence classes for easier searching. Two +/// user values are related if they refer to the same variable, or if they are +/// held by the same virtual register. The equivalence class is the transitive +/// closure of that relation. +namespace { +class LDVImpl; +class UserValue { + const MDNode *variable; ///< The debug info variable we are part of. + unsigned offset; ///< Byte offset into variable. + DebugLoc dl; ///< The debug location for the variable. This is + ///< used by dwarf writer to find lexical scope. + UserValue *leader; ///< Equivalence class leader. + UserValue *next; ///< Next value in equivalence class, or null. + + /// Numbered locations referenced by locmap. + SmallVector<MachineOperand, 4> locations; + + /// Map of slot indices where this value is live. + LocMap locInts; + + /// coalesceLocation - After LocNo was changed, check if it has become + /// identical to another location, and coalesce them. This may cause LocNo or + /// a later location to be erased, but no earlier location will be erased. + void coalesceLocation(unsigned LocNo); + + /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo. + void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, + LiveIntervals &LIS, const TargetInstrInfo &TII); + + /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs + /// is live. Returns true if any changes were made. + bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + +public: + /// UserValue - Create a new UserValue. + UserValue(const MDNode *var, unsigned o, DebugLoc L, + LocMap::Allocator &alloc) + : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc) + {} + + /// getLeader - Get the leader of this value's equivalence class. + UserValue *getLeader() { + UserValue *l = leader; + while (l != l->leader) + l = l->leader; + return leader = l; + } + + /// getNext - Return the next UserValue in the equivalence class. + UserValue *getNext() const { return next; } + + /// match - Does this UserValue match the parameters? + bool match(const MDNode *Var, unsigned Offset) const { + return Var == variable && Offset == offset; + } + + /// merge - Merge equivalence classes. + static UserValue *merge(UserValue *L1, UserValue *L2) { + L2 = L2->getLeader(); + if (!L1) + return L2; + L1 = L1->getLeader(); + if (L1 == L2) + return L1; + // Splice L2 before L1's members. + UserValue *End = L2; + while (End->next) + End->leader = L1, End = End->next; + End->leader = L1; + End->next = L1->next; + L1->next = L2; + return L1; + } + + /// getLocationNo - Return the location number that matches Loc. + unsigned getLocationNo(const MachineOperand &LocMO) { + if (LocMO.isReg()) { + if (LocMO.getReg() == 0) + return ~0u; + // For register locations we dont care about use/def and other flags. + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + locations[i].getReg() == LocMO.getReg() && + locations[i].getSubReg() == LocMO.getSubReg()) + return i; + } else + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (LocMO.isIdenticalTo(locations[i])) + return i; + locations.push_back(LocMO); + // We are storing a MachineOperand outside a MachineInstr. + locations.back().clearParent(); + // Don't store def operands. + if (locations.back().isReg()) + locations.back().setIsUse(); + return locations.size() - 1; + } + + /// mapVirtRegs - Ensure that all virtual register locations are mapped. + void mapVirtRegs(LDVImpl *LDV); + + /// addDef - Add a definition point to this value. + void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + // Add a singular (Idx,Idx) -> Loc mapping. + LocMap::iterator I = locInts.find(Idx); + if (!I.valid() || I.start() != Idx) + I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO)); + else + // A later DBG_VALUE at the same SlotIndex overrides the old location. + I.setValue(getLocationNo(LocMO)); + } + + /// extendDef - Extend the current definition as far as possible down the + /// dominator tree. Stop when meeting an existing def or when leaving the live + /// range of VNI. + /// End points where VNI is no longer live are added to Kills. + /// @param Idx Starting point for the definition. + /// @param LocNo Location number to propagate. + /// @param LI Restrict liveness to where LI has the value VNI. May be null. + /// @param VNI When LI is not null, this is the value to restrict to. + /// @param Kills Append end points of VNI's live range to Kills. + /// @param LIS Live intervals analysis. + /// @param MDT Dominator tree. + void extendDef(SlotIndex Idx, unsigned LocNo, + LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl<SlotIndex> *Kills, + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS); + + /// addDefsFromCopies - The value in LI/LocNo may be copies to other + /// registers. Determine if any of the copies are available at the kill + /// points, and add defs if possible. + /// @param LI Scan for copies of the value in LI->reg. + /// @param LocNo Location number of LI->reg. + /// @param Kills Points where the range of LocNo could be extended. + /// @param NewDefs Append (Idx, LocNo) of inserted defs here. + void addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, + LiveIntervals &LIS); + + /// computeIntervals - Compute the live intervals of all locations after + /// collecting all their def points. + void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS); + + /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is + /// live. Returns true if any changes were made. + bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + + /// rewriteLocations - Rewrite virtual register locations according to the + /// provided virtual register map. + void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); + + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. + void emitDebugValues(VirtRegMap *VRM, + LiveIntervals &LIS, const TargetInstrInfo &TRI); + + /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A + /// variable may have more than one corresponding DBG_VALUE instructions. + /// Only first one needs DebugLoc to identify variable's lexical scope + /// in source file. + DebugLoc findDebugLoc(); + + /// getDebugLoc - Return DebugLoc of this UserValue. + DebugLoc getDebugLoc() { return dl;} + void print(raw_ostream&, const TargetMachine*); +}; +} // namespace + +/// LDVImpl - Implementation of the LiveDebugVariables pass. +namespace { +class LDVImpl { + LiveDebugVariables &pass; + LocMap::Allocator allocator; + MachineFunction *MF; + LiveIntervals *LIS; + LexicalScopes LS; + MachineDominatorTree *MDT; + const TargetRegisterInfo *TRI; + + /// Whether emitDebugValues is called. + bool EmitDone; + /// Whether the machine function is modified during the pass. + bool ModifiedMF; + + /// userValues - All allocated UserValue instances. + SmallVector<UserValue*, 8> userValues; + + /// Map virtual register to eq class leader. + typedef DenseMap<unsigned, UserValue*> VRMap; + VRMap virtRegToEqClass; + + /// Map user variable to eq class leader. + typedef DenseMap<const MDNode *, UserValue*> UVMap; + UVMap userVarMap; + + /// getUserValue - Find or create a UserValue. + UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL); + + /// lookupVirtReg - Find the EC leader for VirtReg or null. + UserValue *lookupVirtReg(unsigned VirtReg); + + /// handleDebugValue - Add DBG_VALUE instruction to our maps. + /// @param MI DBG_VALUE instruction + /// @param Idx Last valid SLotIndex before instruction. + /// @return True if the DBG_VALUE instruction should be deleted. + bool handleDebugValue(MachineInstr *MI, SlotIndex Idx); + + /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding + /// a UserValue def for each instruction. + /// @param mf MachineFunction to be scanned. + /// @return True if any debug values were found. + bool collectDebugValues(MachineFunction &mf); + + /// computeIntervals - Compute the live intervals of all user values after + /// collecting all their def points. + void computeIntervals(); + +public: + LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), + ModifiedMF(false) {} + bool runOnMachineFunction(MachineFunction &mf); + + /// clear - Release all memory. + void clear() { + DeleteContainerPointers(userValues); + userValues.clear(); + virtRegToEqClass.clear(); + userVarMap.clear(); + // Make sure we call emitDebugValues if the machine function was modified. + assert((!ModifiedMF || EmitDone) && + "Dbg values are not emitted in LDV"); + EmitDone = false; + ModifiedMF = false; + } + + /// mapVirtReg - Map virtual register to an equivalence class. + void mapVirtReg(unsigned VirtReg, UserValue *EC); + + /// splitRegister - Replace all references to OldReg with NewRegs. + void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. + void emitDebugValues(VirtRegMap *VRM); + + void print(raw_ostream&); +}; +} // namespace + +void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { + DIVariable DV(variable); + OS << "!\""; + DV.printExtendedName(OS); + OS << "\"\t"; + if (offset) + OS << '+' << offset; + for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { + OS << " [" << I.start() << ';' << I.stop() << "):"; + if (I.value() == ~0u) + OS << "undef"; + else + OS << I.value(); + } + for (unsigned i = 0, e = locations.size(); i != e; ++i) { + OS << " Loc" << i << '='; + locations[i].print(OS, TM); + } + OS << '\n'; +} + +void LDVImpl::print(raw_ostream &OS) { + OS << "********** DEBUG VARIABLES **********\n"; + for (unsigned i = 0, e = userValues.size(); i != e; ++i) + userValues[i]->print(OS, &MF->getTarget()); +} + +void UserValue::coalesceLocation(unsigned LocNo) { + unsigned KeepLoc = 0; + for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) { + if (KeepLoc == LocNo) + continue; + if (locations[KeepLoc].isIdenticalTo(locations[LocNo])) + break; + } + // No matches. + if (KeepLoc == locations.size()) + return; + + // Keep the smaller location, erase the larger one. + unsigned EraseLoc = LocNo; + if (KeepLoc > EraseLoc) + std::swap(KeepLoc, EraseLoc); + locations.erase(locations.begin() + EraseLoc); + + // Rewrite values. + for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { + unsigned v = I.value(); + if (v == EraseLoc) + I.setValue(KeepLoc); // Coalesce when possible. + else if (v > EraseLoc) + I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values. + } +} + +void UserValue::mapVirtRegs(LDVImpl *LDV) { + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + LDV->mapVirtReg(locations[i].getReg(), this); +} + +UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, + DebugLoc DL) { + UserValue *&Leader = userVarMap[Var]; + if (Leader) { + UserValue *UV = Leader->getLeader(); + Leader = UV; + for (; UV; UV = UV->getNext()) + if (UV->match(Var, Offset)) + return UV; + } + + UserValue *UV = new UserValue(Var, Offset, DL, allocator); + userValues.push_back(UV); + Leader = UserValue::merge(Leader, UV); + return UV; +} + +void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs"); + UserValue *&Leader = virtRegToEqClass[VirtReg]; + Leader = UserValue::merge(Leader, EC); +} + +UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { + if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) + return UV->getLeader(); + return 0; +} + +bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { + // DBG_VALUE loc, offset, variable + if (MI->getNumOperands() != 3 || + !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) { + DEBUG(dbgs() << "Can't handle " << *MI); + return false; + } + + // Get or create the UserValue for (variable,offset). + unsigned Offset = MI->getOperand(1).getImm(); + const MDNode *Var = MI->getOperand(2).getMetadata(); + UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); + UV->addDef(Idx, MI->getOperand(0)); + return true; +} + +bool LDVImpl::collectDebugValues(MachineFunction &mf) { + bool Changed = false; + for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE; + ++MFI) { + MachineBasicBlock *MBB = MFI; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI != MBBE;) { + if (!MBBI->isDebugValue()) { + ++MBBI; + continue; + } + // DBG_VALUE has no slot index, use the previous instruction instead. + SlotIndex Idx = MBBI == MBB->begin() ? + LIS->getMBBStartIdx(MBB) : + LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot(); + // Handle consecutive DBG_VALUE instructions with the same slot index. + do { + if (handleDebugValue(MBBI, Idx)) { + MBBI = MBB->erase(MBBI); + Changed = true; + } else + ++MBBI; + } while (MBBI != MBBE && MBBI->isDebugValue()); + } + } + return Changed; +} + +void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, + LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl<SlotIndex> *Kills, + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS) { + SmallVector<SlotIndex, 16> Todo; + Todo.push_back(Idx); + do { + SlotIndex Start = Todo.pop_back_val(); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); + SlotIndex Stop = LIS.getMBBEndIdx(MBB); + LocMap::iterator I = locInts.find(Start); + + // Limit to VNI's live range. + bool ToEnd = true; + if (LI && VNI) { + LiveRange *Range = LI->getLiveRangeContaining(Start); + if (!Range || Range->valno != VNI) { + if (Kills) + Kills->push_back(Start); + continue; + } + if (Range->end < Stop) + Stop = Range->end, ToEnd = false; + } + + // There could already be a short def at Start. + if (I.valid() && I.start() <= Start) { + // Stop when meeting a different location or an already extended interval. + Start = Start.getNextSlot(); + if (I.value() != LocNo || I.stop() != Start) + continue; + // This is a one-slot placeholder. Just skip it. + ++I; + } + + // Limited by the next def. + if (I.valid() && I.start() < Stop) + Stop = I.start(), ToEnd = false; + // Limited by VNI's live range. + else if (!ToEnd && Kills) + Kills->push_back(Stop); + + if (Start >= Stop) + continue; + + I.insert(Start, Stop, LocNo); + + // If we extended to the MBB end, propagate down the dominator tree. + if (!ToEnd) + continue; + const std::vector<MachineDomTreeNode*> &Children = + MDT.getNode(MBB)->getChildren(); + for (unsigned i = 0, e = Children.size(); i != e; ++i) { + MachineBasicBlock *MBB = Children[i]->getBlock(); + if (UVS.dominates(MBB)) + Todo.push_back(LIS.getMBBStartIdx(MBB)); + } + } while (!Todo.empty()); +} + +void +UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS) { + if (Kills.empty()) + return; + // Don't track copies from physregs, there are too many uses. + if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + return; + + // Collect all the (vreg, valno) pairs that are copies of LI. + SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(LI->reg), + UE = MRI.use_nodbg_end(); UI != UE; ++UI) { + // Copies of the full value. + if (UI.getOperand().getSubReg() || !UI->isCopy()) + continue; + MachineInstr *MI = &*UI; + unsigned DstReg = MI->getOperand(0).getReg(); + + // Don't follow copies to physregs. These are usually setting up call + // arguments, and the argument registers are always call clobbered. We are + // better off in the source register which could be a callee-saved register, + // or it could be spilled. + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + continue; + + // Is LocNo extended to reach this copy? If not, another def may be blocking + // it, or we are looking at a wrong value of LI. + SlotIndex Idx = LIS.getInstructionIndex(MI); + LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); + if (!I.valid() || I.value() != LocNo) + continue; + + if (!LIS.hasInterval(DstReg)) + continue; + LiveInterval *DstLI = &LIS.getInterval(DstReg); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); + CopyValues.push_back(std::make_pair(DstLI, DstVNI)); + } + + if (CopyValues.empty()) + return; + + DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); + + // Try to add defs of the copied values for each kill point. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + SlotIndex Idx = Kills[i]; + for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) { + LiveInterval *DstLI = CopyValues[j].first; + const VNInfo *DstVNI = CopyValues[j].second; + if (DstLI->getVNInfoAt(Idx) != DstVNI) + continue; + // Check that there isn't already a def at Idx + LocMap::iterator I = locInts.find(Idx); + if (I.valid() && I.start() <= Idx) + continue; + DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" + << DstVNI->id << " in " << *DstLI << '\n'); + MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); + assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); + unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); + I.insert(Idx, Idx.getNextSlot(), LocNo); + NewDefs.push_back(std::make_pair(Idx, LocNo)); + break; + } + } +} + +void +UserValue::computeIntervals(MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + LiveIntervals &LIS, + MachineDominatorTree &MDT, + UserValueScopes &UVS) { + SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; + + // Collect all defs to be extended (Skipping undefs). + for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) + if (I.value() != ~0u) + Defs.push_back(std::make_pair(I.start(), I.value())); + + // Extend all defs, and possibly add new ones along the way. + for (unsigned i = 0; i != Defs.size(); ++i) { + SlotIndex Idx = Defs[i].first; + unsigned LocNo = Defs[i].second; + const MachineOperand &Loc = locations[LocNo]; + + if (!Loc.isReg()) { + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + continue; + } + + // Register locations are constrained to where the register value is live. + if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + LiveInterval *LI = 0; + const VNInfo *VNI = 0; + if (LIS.hasInterval(Loc.getReg())) { + LI = &LIS.getInterval(Loc.getReg()); + VNI = LI->getVNInfoAt(Idx); + } + SmallVector<SlotIndex, 16> Kills; + extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS); + if (LI) + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); + continue; + } + + // For physregs, use the live range of the first regunit as a guide. + unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); + LiveInterval *LI = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LI->getVNInfoAt(Idx); + // Don't track copies from physregs, it is too expensive. + extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); + } + + // Finally, erase all the undefs. + for (LocMap::iterator I = locInts.begin(); I.valid();) + if (I.value() == ~0u) + I.erase(); + else + ++I; +} + +void LDVImpl::computeIntervals() { + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + UserValueScopes UVS(userValues[i]->getDebugLoc(), LS); + userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS); + userValues[i]->mapVirtRegs(this); + } +} + +bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + LIS = &pass.getAnalysis<LiveIntervals>(); + MDT = &pass.getAnalysis<MachineDominatorTree>(); + TRI = mf.getTarget().getRegisterInfo(); + clear(); + LS.initialize(mf); + DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " + << mf.getName() << " **********\n"); + + bool Changed = collectDebugValues(mf); + computeIntervals(); + DEBUG(print(dbgs())); + LS.releaseMemory(); + ModifiedMF = Changed; + return Changed; +} + +bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { + if (!EnableLDV) + return false; + if (!pImpl) + pImpl = new LDVImpl(this); + return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf); +} + +void LiveDebugVariables::releaseMemory() { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->clear(); +} + +LiveDebugVariables::~LiveDebugVariables() { + if (pImpl) + delete static_cast<LDVImpl*>(pImpl); +} + +//===----------------------------------------------------------------------===// +// Live Range Splitting +//===----------------------------------------------------------------------===// + +bool +UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { + DEBUG({ + dbgs() << "Splitting Loc" << OldLocNo << '\t'; + print(dbgs(), 0); + }); + bool DidChange = false; + LocMap::iterator LocMapI; + LocMapI.setMap(locInts); + for (unsigned i = 0; i != NewRegs.size(); ++i) { + LiveInterval *LI = NewRegs[i]; + if (LI->empty()) + continue; + + // Don't allocate the new LocNo until it is needed. + unsigned NewLocNo = ~0u; + + // Iterate over the overlaps between locInts and LI. + LocMapI.find(LI->beginIndex()); + if (!LocMapI.valid()) + continue; + LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start()); + LiveInterval::iterator LIE = LI->end(); + while (LocMapI.valid() && LII != LIE) { + // At this point, we know that LocMapI.stop() > LII->start. + LII = LI->advanceTo(LII, LocMapI.start()); + if (LII == LIE) + break; + + // Now LII->end > LocMapI.start(). Do we have an overlap? + if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) { + // Overlapping correct location. Allocate NewLocNo now. + if (NewLocNo == ~0u) { + MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); + MO.setSubReg(locations[OldLocNo].getSubReg()); + NewLocNo = getLocationNo(MO); + DidChange = true; + } + + SlotIndex LStart = LocMapI.start(); + SlotIndex LStop = LocMapI.stop(); + + // Trim LocMapI down to the LII overlap. + if (LStart < LII->start) + LocMapI.setStartUnchecked(LII->start); + if (LStop > LII->end) + LocMapI.setStopUnchecked(LII->end); + + // Change the value in the overlap. This may trigger coalescing. + LocMapI.setValue(NewLocNo); + + // Re-insert any removed OldLocNo ranges. + if (LStart < LocMapI.start()) { + LocMapI.insert(LStart, LocMapI.start(), OldLocNo); + ++LocMapI; + assert(LocMapI.valid() && "Unexpected coalescing"); + } + if (LStop > LocMapI.stop()) { + ++LocMapI; + LocMapI.insert(LII->end, LStop, OldLocNo); + --LocMapI; + } + } + + // Advance to the next overlap. + if (LII->end < LocMapI.stop()) { + if (++LII == LIE) + break; + LocMapI.advanceTo(LII->start); + } else { + ++LocMapI; + if (!LocMapI.valid()) + break; + LII = LI->advanceTo(LII, LocMapI.start()); + } + } + } + + // Finally, remove any remaining OldLocNo intervals and OldLocNo itself. + locations.erase(locations.begin() + OldLocNo); + LocMapI.goToBegin(); + while (LocMapI.valid()) { + unsigned v = LocMapI.value(); + if (v == OldLocNo) { + DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' + << LocMapI.stop() << ")\n"); + LocMapI.erase(); + } else { + if (v > OldLocNo) + LocMapI.setValueUnchecked(v-1); + ++LocMapI; + } + } + + DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);}); + return DidChange; +} + +bool +UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { + bool DidChange = false; + // Split locations referring to OldReg. Iterate backwards so splitLocation can + // safely erase unused locations. + for (unsigned i = locations.size(); i ; --i) { + unsigned LocNo = i-1; + const MachineOperand *Loc = &locations[LocNo]; + if (!Loc->isReg() || Loc->getReg() != OldReg) + continue; + DidChange |= splitLocation(LocNo, NewRegs); + } + return DidChange; +} + +void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { + bool DidChange = false; + for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) + DidChange |= UV->splitRegister(OldReg, NewRegs); + + if (!DidChange) + return; + + // Map all of the new virtual registers. + UserValue *UV = lookupVirtReg(OldReg); + for (unsigned i = 0; i != NewRegs.size(); ++i) + mapVirtReg(NewRegs[i]->reg, UV); +} + +void LiveDebugVariables:: +splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); +} + +void +UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { + // Iterate over locations in reverse makes it easier to handle coalescing. + for (unsigned i = locations.size(); i ; --i) { + unsigned LocNo = i-1; + MachineOperand &Loc = locations[LocNo]; + // Only virtual registers are rewritten. + if (!Loc.isReg() || !Loc.getReg() || + !TargetRegisterInfo::isVirtualRegister(Loc.getReg())) + continue; + unsigned VirtReg = Loc.getReg(); + if (VRM.isAssignedReg(VirtReg) && + TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + // This can create a %noreg operand in rare cases when the sub-register + // index is no longer available. That means the user value is in a + // non-existent sub-register, and %noreg is exactly what we want. + Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { + // FIXME: Translate SubIdx to a stackslot offset. + Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); + } else { + Loc.setReg(0); + Loc.setSubReg(0); + } + coalesceLocation(LocNo); + } +} + +/// findInsertLocation - Find an iterator for inserting a DBG_VALUE +/// instruction. +static MachineBasicBlock::iterator +findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, + LiveIntervals &LIS) { + SlotIndex Start = LIS.getMBBStartIdx(MBB); + Idx = Idx.getBaseIndex(); + + // Try to find an insert location by going backwards from Idx. + MachineInstr *MI; + while (!(MI = LIS.getInstructionFromIndex(Idx))) { + // We've reached the beginning of MBB. + if (Idx == Start) { + MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin()); + return I; + } + Idx = Idx.getPrevIndex(); + } + + // Don't insert anything after the first terminator, though. + return MI->isTerminator() ? MBB->getFirstTerminator() : + llvm::next(MachineBasicBlock::iterator(MI)); +} + +DebugLoc UserValue::findDebugLoc() { + DebugLoc D = dl; + dl = DebugLoc(); + return D; +} +void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, + unsigned LocNo, + LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); + MachineOperand &Loc = locations[LocNo]; + ++NumInsertedDebugValues; + + // Frame index locations may require a target callback. + if (Loc.isFI()) { + MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(), + Loc.getIndex(), offset, variable, + findDebugLoc()); + if (MI) { + MBB->insert(I, MI); + return; + } + } + // This is not a frame index, or the target is happy with a standard FI. + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(Loc).addImm(offset).addMetadata(variable); +} + +void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); + + for (LocMap::const_iterator I = locInts.begin(); I.valid();) { + SlotIndex Start = I.start(); + SlotIndex Stop = I.stop(); + unsigned LocNo = I.value(); + DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); + + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + insertDebugValue(MBB, Start, LocNo, LIS, TII); + // This interval may span multiple basic blocks. + // Insert a DBG_VALUE into each one. + while(Stop > MBBEnd) { + // Move to the next block. + Start = MBBEnd; + if (++MBB == MFEnd) + break; + MBBEnd = LIS.getMBBEndIdx(MBB); + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + insertDebugValue(MBB, Start, LocNo, LIS, TII); + } + DEBUG(dbgs() << '\n'); + if (MBB == MFEnd) + break; + + ++I; + } +} + +void LDVImpl::emitDebugValues(VirtRegMap *VRM) { + DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); + userValues[i]->rewriteLocations(*VRM, *TRI); + userValues[i]->emitDebugValues(VRM, *LIS, *TII); + } + EmitDone = true; +} + +void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM); +} + + +#ifndef NDEBUG +void LiveDebugVariables::dump() { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->print(dbgs()); +} +#endif + diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h new file mode 100644 index 0000000..3ce3c39 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -0,0 +1,70 @@ +//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface to the LiveDebugVariables analysis. +// +// The analysis removes DBG_VALUE instructions for virtual registers and tracks +// live user variables in a data structure that can be updated during register +// allocation. +// +// After register allocation new DBG_VALUE instructions are emitted to reflect +// the new locations of user variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H +#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class LiveInterval; +class VirtRegMap; + +class LiveDebugVariables : public MachineFunctionPass { + void *pImpl; +public: + static char ID; // Pass identification, replacement for typeid + + LiveDebugVariables(); + ~LiveDebugVariables(); + + /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx. + /// @param OldReg Old virtual register that is going away. + /// @param NewReg New register holding the user variables. + /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub- + /// register. + void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); + + /// splitRegister - Move any user variables in OldReg to the live ranges in + /// NewRegs where they are live. Mark the values as unavailable where no new + /// register is live. + void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + + /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes + /// that happened during register allocation. + /// @param VRM Rename virtual registers according to map. + void emitDebugValues(VirtRegMap *VRM); + + /// dump - Print data structures to dbgs(). + void dump(); + +private: + + virtual bool runOnMachineFunction(MachineFunction &); + virtual void releaseMemory(); + virtual void getAnalysisUsage(AnalysisUsage &) const; + +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp new file mode 100644 index 0000000..dccd847 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -0,0 +1,951 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRange and LiveInterval classes. Given some +// numbering of each the machine instructions an interval [i, j) is said to be a +// live interval for register v if there is no instruction with number j' > j +// such that v is live at j' and there is no instruction with number i' < i such +// that v is live at i'. In this implementation intervals can have holes, +// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each +// individual range is represented as an instance of LiveRange, and the whole +// interval is represented as an instance of LiveInterval. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "RegisterCoalescer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +using namespace llvm; + +LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { + // This algorithm is basically std::upper_bound. + // Unfortunately, std::upper_bound cannot be used with mixed types until we + // adopt C++0x. Many libraries can do it, but not all. + if (empty() || Pos >= endIndex()) + return end(); + iterator I = begin(); + size_t Len = ranges.size(); + do { + size_t Mid = Len >> 1; + if (Pos < I[Mid].end) + Len = Mid; + else + I += Mid + 1, Len -= Mid + 1; + } while (Len); + return I; +} + +VNInfo *LiveInterval::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { + assert(!Def.isDead() && "Cannot define a value at the dead slot"); + iterator I = find(Def); + if (I == end()) { + VNInfo *VNI = getNextValue(Def, VNInfoAllocator); + ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + if (SlotIndex::isSameInstr(Def, I->start)) { + assert(I->valno->def == I->start && "Inconsistent existing value def"); + + // It is possible to have both normal and early-clobber defs of the same + // register on an instruction. It doesn't make a lot of sense, but it is + // possible to specify in inline assembly. + // + // Just convert everything to early-clobber. + Def = std::min(Def, I->start); + if (Def != I->start) + I->start = I->valno->def = Def; + return I->valno; + } + assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); + VNInfo *VNI = getNextValue(Def, VNInfoAllocator); + ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI)); + return VNI; +} + +// overlaps - Return true if the intersection of the two live intervals is +// not empty. +// +// An example for overlaps(): +// +// 0: A = ... +// 4: B = ... +// 8: C = A + B ;; last use of A +// +// The live intervals should look like: +// +// A = [3, 11) +// B = [7, x) +// C = [11, y) +// +// A->overlaps(C) should return false since we want to be able to join +// A and C. +// +bool LiveInterval::overlapsFrom(const LiveInterval& other, + const_iterator StartPos) const { + assert(!empty() && "empty interval"); + const_iterator i = begin(); + const_iterator ie = end(); + const_iterator j = StartPos; + const_iterator je = other.end(); + + assert((StartPos->start <= i->start || StartPos == other.begin()) && + StartPos != other.end() && "Bogus start position hint!"); + + if (i->start < j->start) { + i = std::upper_bound(i, ie, j->start); + if (i != ranges.begin()) --i; + } else if (j->start < i->start) { + ++StartPos; + if (StartPos != other.end() && StartPos->start <= i->start) { + assert(StartPos < other.end() && i < end()); + j = std::upper_bound(j, je, i->start); + if (j != other.ranges.begin()) --j; + } + } else { + return true; + } + + if (j == je) return false; + + while (i != ie) { + if (i->start > j->start) { + std::swap(i, j); + std::swap(ie, je); + } + + if (i->end > j->start) + return true; + ++i; + } + + return false; +} + +bool LiveInterval::overlaps(const LiveInterval &Other, + const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty interval"); + if (Other.empty()) + return false; + + // Use binary searches to find initial positions. + const_iterator I = find(Other.beginIndex()); + const_iterator IE = end(); + if (I == IE) + return false; + const_iterator J = Other.find(I->start); + const_iterator JE = Other.end(); + if (J == JE) + return false; + + for (;;) { + // J has just been advanced to satisfy: + assert(J->end >= I->start); + // Check for an overlap. + if (J->start < I->end) { + // I and J are overlapping. Find the later start. + SlotIndex Def = std::max(I->start, J->start); + // Allow the overlap if Def is a coalescable copy. + if (Def.isBlock() || + !CP.isCoalescable(Indexes.getInstructionFromIndex(Def))) + return true; + } + // Advance the iterator that ends first to check for more overlaps. + if (J->end > I->end) { + std::swap(I, J); + std::swap(IE, JE); + } + // Advance J until J->end >= I->start. + do + if (++J == JE) + return false; + while (J->end < I->start); + } +} + +/// overlaps - Return true if the live interval overlaps a range specified +/// by [Start, End). +bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { + assert(Start < End && "Invalid range"); + const_iterator I = std::lower_bound(begin(), end(), End); + return I != begin() && (--I)->end > Start; +} + + +/// ValNo is dead, remove it. If it is the largest value number, just nuke it +/// (and any other deleted values neighboring it), otherwise mark it as ~1U so +/// it can be nuked later. +void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { + if (ValNo->id == getNumValNums()-1) { + do { + valnos.pop_back(); + } while (!valnos.empty() && valnos.back()->isUnused()); + } else { + ValNo->markUnused(); + } +} + +/// RenumberValues - Renumber all values in order of appearance and delete the +/// remaining unused values. +void LiveInterval::RenumberValues(LiveIntervals &lis) { + SmallPtrSet<VNInfo*, 8> Seen; + valnos.clear(); + for (const_iterator I = begin(), E = end(); I != E; ++I) { + VNInfo *VNI = I->valno; + if (!Seen.insert(VNI)) + continue; + assert(!VNI->isUnused() && "Unused valno used by live range"); + VNI->id = (unsigned)valnos.size(); + valnos.push_back(VNI); + } +} + +/// extendIntervalEndTo - This method is used when we want to extend the range +/// specified by I to end at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. The iterator is +/// not invalidated. +void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = llvm::next(I); + for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + } + + // If NewEnd was in the middle of an interval, make sure to get its endpoint. + I->end = std::max(NewEnd, prior(MergeTo)->end); + + // If the newly formed range now touches the range after it and if they have + // the same value number, merge the two ranges into one range. + if (MergeTo != ranges.end() && MergeTo->start <= I->end && + MergeTo->valno == ValNo) { + I->end = MergeTo->end; + ++MergeTo; + } + + // Erase any dead ranges. + ranges.erase(llvm::next(I), MergeTo); +} + + +/// extendIntervalStartTo - This method is used when we want to extend the range +/// specified by I to start at the specified endpoint. To do this, we should +/// merge and eliminate all ranges that this will overlap with. +LiveInterval::Ranges::iterator +LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { + assert(I != ranges.end() && "Not a valid interval!"); + VNInfo *ValNo = I->valno; + + // Search for the first interval that we can't merge with. + Ranges::iterator MergeTo = I; + do { + if (MergeTo == ranges.begin()) { + I->start = NewStart; + ranges.erase(MergeTo, I); + return I; + } + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another interval, just delete a range and + // extend that interval. + if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { + MergeTo->end = I->end; + } else { + // Otherwise, extend the interval right after. + ++MergeTo; + MergeTo->start = NewStart; + MergeTo->end = I->end; + } + + ranges.erase(llvm::next(MergeTo), llvm::next(I)); + return MergeTo; +} + +LiveInterval::iterator +LiveInterval::addRangeFrom(LiveRange LR, iterator From) { + SlotIndex Start = LR.start, End = LR.end; + iterator it = std::upper_bound(From, ranges.end(), Start); + + // If the inserted interval starts in the middle or right at the end of + // another interval, just extend that interval to contain the range of LR. + if (it != ranges.begin()) { + iterator B = prior(it); + if (LR.valno == B->valno) { + if (B->start <= Start && B->end >= Start) { + extendIntervalEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(B->end <= Start && + "Cannot overlap two LiveRanges with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this range ends in the middle of, or right next to, another + // interval, merge it into that interval. + if (it != ranges.end()) { + if (LR.valno == it->valno) { + if (it->start <= End) { + it = extendIntervalStartTo(it, Start); + + // If LR is a complete superset of an interval, we may need to grow its + // endpoint as well. + if (End > it->end) + extendIntervalEndTo(it, End); + return it; + } + } else { + // Check to make sure that we are not overlapping two live ranges with + // different valno's. + assert(it->start >= End && + "Cannot overlap two LiveRanges with differing ValID's"); + } + } + + // Otherwise, this is just a new range that doesn't interact with anything. + // Insert it. + return ranges.insert(it, LR); +} + +/// extendInBlock - If this interval is live before Kill in the basic +/// block that starts at StartIdx, extend it to be live up to Kill and return +/// the value. If there is no live range before Kill, return NULL. +VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { + if (empty()) + return 0; + iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); + if (I == begin()) + return 0; + --I; + if (I->end <= StartIdx) + return 0; + if (I->end < Kill) + extendIntervalEndTo(I, Kill); + return I->valno; +} + +/// removeRange - Remove the specified range from this interval. Note that +/// the range must be in a single LiveRange in its entirety. +void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, + bool RemoveDeadValNo) { + // Find the LiveRange containing this span. + Ranges::iterator I = find(Start); + assert(I != ranges.end() && "Range is not in interval!"); + assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); + + // If the span we are removing is at the start of the LiveRange, adjust it. + VNInfo *ValNo = I->valno; + if (I->start == Start) { + if (I->end == End) { + if (RemoveDeadValNo) { + // Check if val# is dead. + bool isDead = true; + for (const_iterator II = begin(), EE = end(); II != EE; ++II) + if (II != I && II->valno == ValNo) { + isDead = false; + break; + } + if (isDead) { + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); + } + } + + ranges.erase(I); // Removed the whole LiveRange. + } else + I->start = End; + return; + } + + // Otherwise if the span we are removing is at the end of the LiveRange, + // adjust the other way. + if (I->end == End) { + I->end = Start; + return; + } + + // Otherwise, we are splitting the LiveRange into two pieces. + SlotIndex OldEnd = I->end; + I->end = Start; // Trim the old interval. + + // Insert the new one. + ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); +} + +/// removeValNo - Remove all the ranges defined by the specified value#. +/// Also remove the value# from value# list. +void LiveInterval::removeValNo(VNInfo *ValNo) { + if (empty()) return; + Ranges::iterator I = ranges.end(); + Ranges::iterator E = ranges.begin(); + do { + --I; + if (I->valno == ValNo) + ranges.erase(I); + } while (I != E); + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); +} + +/// join - Join two live intervals (this, and other) together. This applies +/// mappings to the value numbers in the LHS/RHS intervals as specified. If +/// the intervals are not joinable, this aborts. +void LiveInterval::join(LiveInterval &Other, + const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVector<VNInfo*, 16> &NewVNInfo, + MachineRegisterInfo *MRI) { + verify(); + + // Determine if any of our live range values are mapped. This is uncommon, so + // we want to avoid the interval scan if not. + bool MustMapCurValNos = false; + unsigned NumVals = getNumValNums(); + unsigned NumNewVals = NewVNInfo.size(); + for (unsigned i = 0; i != NumVals; ++i) { + unsigned LHSValID = LHSValNoAssignments[i]; + if (i != LHSValID || + (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) { + MustMapCurValNos = true; + break; + } + } + + // If we have to apply a mapping to our base interval assignment, rewrite it + // now. + if (MustMapCurValNos && !empty()) { + // Map the first live range. + + iterator OutIt = begin(); + OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; + for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) { + VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + assert(nextValNo != 0 && "Huh?"); + + // If this live range has the same value # as its immediate predecessor, + // and if they are neighbors, remove one LiveRange. This happens when we + // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->valno == nextValNo && OutIt->end == I->start) { + OutIt->end = I->end; + } else { + // Didn't merge. Move OutIt to the next interval, + ++OutIt; + OutIt->valno = nextValNo; + if (OutIt != I) { + OutIt->start = I->start; + OutIt->end = I->end; + } + } + } + // If we merge some live ranges, chop off the end. + ++OutIt; + ranges.erase(OutIt, end()); + } + + // Rewrite Other values before changing the VNInfo ids. + // This can leave Other in an invalid state because we're not coalescing + // touching segments that now have identical values. That's OK since Other is + // not supposed to be valid after calling join(); + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; + + // Update val# info. Renumber them and make sure they all belong to this + // LiveInterval now. Also remove dead val#'s. + unsigned NumValNos = 0; + for (unsigned i = 0; i < NumNewVals; ++i) { + VNInfo *VNI = NewVNInfo[i]; + if (VNI) { + if (NumValNos >= NumVals) + valnos.push_back(VNI); + else + valnos[NumValNos] = VNI; + VNI->id = NumValNos++; // Renumber val#. + } + } + if (NumNewVals < NumVals) + valnos.resize(NumNewVals); // shrinkify + + // Okay, now insert the RHS live ranges into the LHS. + LiveRangeUpdater Updater(this); + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + Updater.add(*I); +} + +/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live +/// interval as the specified value number. The LiveRanges in RHS are +/// allowed to overlap with LiveRanges in the current interval, but only if +/// the overlapping LiveRanges have the specified value number. +void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, + VNInfo *LHSValNo) { + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + Updater.add(I->start, I->end, LHSValNo); +} + +/// MergeValueInAsValue - Merge all of the live ranges of a specific val# +/// in RHS into this live interval as the specified value number. +/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the +/// current interval, it will replace the value numbers of the overlaped +/// live ranges with the specified value number. +void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + if (I->valno == RHSValNo) + Updater.add(I->start, I->end, LHSValNo); +} + +/// MergeValueNumberInto - This method is called when two value nubmers +/// are found to be equivalent. This eliminates V1, replacing all +/// LiveRanges with the V1 value number with the V2 value number. This can +/// cause merging of V1/V2 values numbers and compaction of the value space. +VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { + assert(V1 != V2 && "Identical value#'s are always equivalent!"); + + // This code actually merges the (numerically) larger value number into the + // smaller value number, which is likely to allow us to compactify the value + // space. The only thing we have to be careful of is to preserve the + // instruction that defines the result value. + + // Make sure V2 is smaller than V1. + if (V1->id < V2->id) { + V1->copyFrom(*V2); + std::swap(V1, V2); + } + + // Merge V1 live ranges into V2. + for (iterator I = begin(); I != end(); ) { + iterator LR = I++; + if (LR->valno != V1) continue; // Not a V1 LiveRange. + + // Okay, we found a V1 live range. If it had a previous, touching, V2 live + // range, extend it. + if (LR != begin()) { + iterator Prev = LR-1; + if (Prev->valno == V2 && Prev->end == LR->start) { + Prev->end = LR->end; + + // Erase this live-range. + ranges.erase(LR); + I = Prev+1; + LR = Prev; + } + } + + // Okay, now we have a V1 or V2 live range that is maximally merged forward. + // Ensure that it is a V2 live-range. + LR->valno = V2; + + // If we can merge it into later V2 live ranges, do so now. We ignore any + // following V1 live ranges, as they will be merged in subsequent iterations + // of the loop. + if (I != end()) { + if (I->start == LR->end && I->valno == V2) { + LR->end = I->end; + ranges.erase(I); + I = LR+1; + } + } + } + + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); + + return V2; +} + +unsigned LiveInterval::getSize() const { + unsigned Sum = 0; + for (const_iterator I = begin(), E = end(); I != E; ++I) + Sum += I->start.distance(I->end); + return Sum; +} + +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { + return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveRange::dump() const { + dbgs() << *this << "\n"; +} +#endif + +void LiveInterval::print(raw_ostream &OS) const { + if (empty()) + OS << "EMPTY"; + else { + for (LiveInterval::Ranges::const_iterator I = ranges.begin(), + E = ranges.end(); I != E; ++I) { + OS << *I; + assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); + } + } + + // Print value number info. + if (getNumValNums()) { + OS << " "; + unsigned vnum = 0; + for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e; + ++i, ++vnum) { + const VNInfo *vni = *i; + if (vnum) OS << " "; + OS << vnum << "@"; + if (vni->isUnused()) { + OS << "x"; + } else { + OS << vni->def; + if (vni->isPHIDef()) + OS << "-phi"; + } + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveInterval::dump() const { + dbgs() << *this << "\n"; +} +#endif + +#ifndef NDEBUG +void LiveInterval::verify() const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + assert(I->start.isValid()); + assert(I->end.isValid()); + assert(I->start < I->end); + assert(I->valno != 0); + assert(I->valno == valnos[I->valno->id]); + if (llvm::next(I) != E) { + assert(I->end <= llvm::next(I)->start); + if (I->end == llvm::next(I)->start) + assert(I->valno != llvm::next(I)->valno); + } + } +} +#endif + + +void LiveRange::print(raw_ostream &os) const { + os << *this; +} + +//===----------------------------------------------------------------------===// +// LiveRangeUpdater class +//===----------------------------------------------------------------------===// +// +// The LiveRangeUpdater class always maintains these invariants: +// +// - When LastStart is invalid, Spills is empty and the iterators are invalid. +// This is the initial state, and the state created by flush(). +// In this state, isDirty() returns false. +// +// Otherwise, segments are kept in three separate areas: +// +// 1. [begin; WriteI) at the front of LI. +// 2. [ReadI; end) at the back of LI. +// 3. Spills. +// +// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - Segments in all three areas are fully ordered and coalesced. +// - Segments in area 1 precede and can't coalesce with segments in area 2. +// - Segments in Spills precede and can't coalesce with segments in area 2. +// - No coalescing is possible between segments in Spills and segments in area +// 1, and there are no overlapping segments. +// +// The segments in Spills are not ordered with respect to the segments in area +// 1. They need to be merged. +// +// When they exist, Spills.back().start <= LastStart, +// and WriteI[-1].start <= LastStart. + +void LiveRangeUpdater::print(raw_ostream &OS) const { + if (!isDirty()) { + if (LI) + OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + else + OS << "Null updater.\n"; + return; + } + assert(LI && "Can't have null LI in dirty updater."); + OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + << ", last start = " << LastStart + << ":\n Area 1:"; + for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + OS << ' ' << *I; + OS << "\n Spills:"; + for (unsigned I = 0, E = Spills.size(); I != E; ++I) + OS << ' ' << Spills[I]; + OS << "\n Area 2:"; + for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + OS << ' ' << *I; + OS << '\n'; +} + +void LiveRangeUpdater::dump() const +{ + print(errs()); +} + +// Determine if A and B should be coalesced. +static inline bool coalescable(const LiveRange &A, const LiveRange &B) { + assert(A.start <= B.start && "Unordered live ranges."); + if (A.end == B.start) + return A.valno == B.valno; + if (A.end < B.start) + return false; + assert(A.valno == B.valno && "Cannot overlap different values"); + return true; +} + +void LiveRangeUpdater::add(LiveRange Seg) { + assert(LI && "Cannot add to a null destination"); + + // Flush the state if Start moves backwards. + if (!LastStart.isValid() || LastStart > Seg.start) { + if (isDirty()) + flush(); + // This brings us to an uninitialized state. Reinitialize. + assert(Spills.empty() && "Leftover spilled segments"); + WriteI = ReadI = LI->begin(); + } + + // Remember start for next time. + LastStart = Seg.start; + + // Advance ReadI until it ends after Seg.start. + LiveInterval::iterator E = LI->end(); + if (ReadI != E && ReadI->end <= Seg.start) { + // First try to close the gap between WriteI and ReadI with spills. + if (ReadI != WriteI) + mergeSpills(); + // Then advance ReadI. + if (ReadI == WriteI) + ReadI = WriteI = LI->find(Seg.start); + else + while (ReadI != E && ReadI->end <= Seg.start) + *WriteI++ = *ReadI++; + } + + assert(ReadI == E || ReadI->end > Seg.start); + + // Check if the ReadI segment begins early. + if (ReadI != E && ReadI->start <= Seg.start) { + assert(ReadI->valno == Seg.valno && "Cannot overlap different values"); + // Bail if Seg is completely contained in ReadI. + if (ReadI->end >= Seg.end) + return; + // Coalesce into Seg. + Seg.start = ReadI->start; + ++ReadI; + } + + // Coalesce as much as possible from ReadI into Seg. + while (ReadI != E && coalescable(Seg, *ReadI)) { + Seg.end = std::max(Seg.end, ReadI->end); + ++ReadI; + } + + // Try coalescing Spills.back() into Seg. + if (!Spills.empty() && coalescable(Spills.back(), Seg)) { + Seg.start = Spills.back().start; + Seg.end = std::max(Spills.back().end, Seg.end); + Spills.pop_back(); + } + + // Try coalescing Seg into WriteI[-1]. + if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); + return; + } + + // Seg doesn't coalesce with anything, and needs to be inserted somewhere. + if (WriteI != ReadI) { + *WriteI++ = Seg; + return; + } + + // Finally, append to LI or Spills. + if (WriteI == E) { + LI->ranges.push_back(Seg); + WriteI = ReadI = LI->ranges.end(); + } else + Spills.push_back(Seg); +} + +// Merge as many spilled segments as possible into the gap between WriteI +// and ReadI. Advance WriteI to reflect the inserted instructions. +void LiveRangeUpdater::mergeSpills() { + // Perform a backwards merge of Spills and [SpillI;WriteI). + size_t GapSize = ReadI - WriteI; + size_t NumMoved = std::min(Spills.size(), GapSize); + LiveInterval::iterator Src = WriteI; + LiveInterval::iterator Dst = Src + NumMoved; + LiveInterval::iterator SpillSrc = Spills.end(); + LiveInterval::iterator B = LI->begin(); + + // This is the new WriteI position after merging spills. + WriteI = Dst; + + // Now merge Src and Spills backwards. + while (Src != Dst) { + if (Src != B && Src[-1].start > SpillSrc[-1].start) + *--Dst = *--Src; + else + *--Dst = *--SpillSrc; + } + assert(NumMoved == size_t(Spills.end() - SpillSrc)); + Spills.erase(SpillSrc, Spills.end()); +} + +void LiveRangeUpdater::flush() { + if (!isDirty()) + return; + // Clear the dirty state. + LastStart = SlotIndex(); + + assert(LI && "Cannot add to a null destination"); + + // Nothing to merge? + if (Spills.empty()) { + LI->ranges.erase(WriteI, ReadI); + LI->verify(); + return; + } + + // Resize the WriteI - ReadI gap to match Spills. + size_t GapSize = ReadI - WriteI; + if (GapSize < Spills.size()) { + // The gap is too small. Make some room. + size_t WritePos = WriteI - LI->begin(); + LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + // This also invalidated ReadI, but it is recomputed below. + WriteI = LI->ranges.begin() + WritePos; + } else { + // Shrink the gap if necessary. + LI->ranges.erase(WriteI + Spills.size(), ReadI); + } + ReadI = WriteI + Spills.size(); + mergeSpills(); + LI->verify(); +} + +unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { + // Create initial equivalence classes. + EqClass.clear(); + EqClass.grow(LI->getNumValNums()); + + const VNInfo *used = 0, *unused = 0; + + // Determine connections. + for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end(); + I != E; ++I) { + const VNInfo *VNI = *I; + // Group all unused values into one class. + if (VNI->isUnused()) { + if (unused) + EqClass.join(unused->id, VNI->id); + unused = VNI; + continue; + } + used = VNI; + if (VNI->isPHIDef()) { + const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + assert(MBB && "Phi-def has no defining MBB"); + // Connect to values live out of predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) + if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI))) + EqClass.join(VNI->id, PVNI->id); + } else { + // Normal value defined by an instruction. Check for two-addr redef. + // FIXME: This could be coincidental. Should we really check for a tied + // operand constraint? + // Note that VNI->def may be a use slot for an early clobber def. + if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def)) + EqClass.join(VNI->id, UVNI->id); + } + } + + // Lump all the unused values in with the last used value. + if (used && unused) + EqClass.join(used->id, unused->id); + + EqClass.compress(); + return EqClass.getNumClasses(); +} + +void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], + MachineRegisterInfo &MRI) { + assert(LIV[0] && "LIV[0] must be set"); + LiveInterval &LI = *LIV[0]; + + // Rewrite instructions. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + // DBG_VALUE instructions should have been eliminated earlier. + LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + // In the case of an <undef> use that isn't tied to any def, VNI will be + // NULL. If the use is tied to a def, VNI will be the defined value. + if (!VNI) + continue; + MO.setReg(LIV[getEqClass(VNI)]->reg); + } + + // Move runs to new intervals. + LiveInterval::iterator J = LI.begin(), E = LI.end(); + while (J != E && EqClass[J->valno->id] == 0) + ++J; + for (LiveInterval::iterator I = J; I != E; ++I) { + if (unsigned eq = EqClass[I->valno->id]) { + assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && + "New intervals should be empty"); + LIV[eq]->ranges.push_back(*I); + } else + *J++ = *I; + } + LI.ranges.erase(J, E); + + // Transfer VNInfos to their new owners and renumber them. + unsigned j = 0, e = LI.getNumValNums(); + while (j != e && EqClass[j] == 0) + ++j; + for (unsigned i = j; i != e; ++i) { + VNInfo *VNI = LI.getValNumInfo(i); + if (unsigned eq = EqClass[i]) { + VNI->id = LIV[eq]->getNumValNums(); + LIV[eq]->valnos.push_back(VNI); + } else { + VNI->id = j; + LI.valnos[j++] = VNI; + } + } + LI.valnos.resize(j); +} diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp new file mode 100644 index 0000000..f1b8394 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -0,0 +1,1172 @@ +//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveInterval analysis pass which is used +// by the Linear Scan Register allocator. This pass linearizes the +// basic blocks of the function in DFS order and uses the +// LiveVariables pass to conservatively compute live intervals for +// each virtual and physical register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "LiveRangeCalc.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cmath> +#include <limits> +using namespace llvm; + +char LiveIntervals::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervals::ID; +INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false) + +void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + // LiveVariables isn't really required by this analysis, it is only required + // here to make sure it is live during TwoAddressInstructionPass and + // PHIElimination. This is temporary. + AU.addRequired<LiveVariables>(); + AU.addPreserved<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addRequiredTransitiveID(MachineDominatorsID); + AU.addPreservedID(MachineDominatorsID); + AU.addPreserved<SlotIndexes>(); + AU.addRequiredTransitive<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), + DomTree(0), LRCalc(0) { + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); +} + +LiveIntervals::~LiveIntervals() { + delete LRCalc; +} + +void LiveIntervals::releaseMemory() { + // Free the live intervals themselves. + for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) + delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + VirtRegIntervals.clear(); + RegMaskSlots.clear(); + RegMaskBits.clear(); + RegMaskBlocks.clear(); + + for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) + delete RegUnitIntervals[i]; + RegUnitIntervals.clear(); + + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + MRI = &MF->getRegInfo(); + TM = &fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + AA = &getAnalysis<AliasAnalysis>(); + Indexes = &getAnalysis<SlotIndexes>(); + DomTree = &getAnalysis<MachineDominatorTree>(); + if (!LRCalc) + LRCalc = new LiveRangeCalc(); + + // Allocate space for all virtual registers. + VirtRegIntervals.resize(MRI->getNumVirtRegs()); + + computeVirtRegs(); + computeRegMasks(); + computeLiveInRegUnits(); + + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void LiveIntervals::print(raw_ostream &OS, const Module* ) const { + OS << "********** INTERVALS **********\n"; + + // Dump the regunits. + for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) + if (LiveInterval *LI = RegUnitIntervals[i]) + OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; + + // Dump the virtregs. + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (hasInterval(Reg)) + OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + } + + OS << "RegMasks:"; + for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i) + OS << ' ' << RegMaskSlots[i]; + OS << '\n'; + + printInstrs(OS); +} + +void LiveIntervals::printInstrs(raw_ostream &OS) const { + OS << "********** MACHINEINSTRS **********\n"; + MF->print(OS, Indexes); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveIntervals::dumpInstrs() const { + printInstrs(dbgs()); +} +#endif + +LiveInterval* LiveIntervals::createInterval(unsigned reg) { + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + return new LiveInterval(reg, Weight); +} + + +/// computeVirtRegInterval - Compute the live interval of a virtual register, +/// based on defs and uses. +void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { + assert(LRCalc && "LRCalc not initialized."); + assert(LI->empty() && "Should only compute empty intervals."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->createDeadDefs(LI); + LRCalc->extendToUses(LI); +} + +void LiveIntervals::computeVirtRegs() { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = createInterval(Reg); + VirtRegIntervals[Reg] = LI; + computeVirtRegInterval(LI); + } +} + +void LiveIntervals::computeRegMasks() { + RegMaskBlocks.resize(MF->getNumBlockIDs()); + + // Find all instructions with regmask operands. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.first = RegMaskSlots.size(); + for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); + MI != ME; ++MI) + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isRegMask()) + continue; + RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); + RegMaskBits.push_back(MO->getRegMask()); + } + // Compute the number of register mask instructions in this block. + RMB.second = RegMaskSlots.size() - RMB.first; + } +} + +//===----------------------------------------------------------------------===// +// Register Unit Liveness +//===----------------------------------------------------------------------===// +// +// Fixed interference typically comes from ABI boundaries: Function arguments +// and return values are passed in fixed registers, and so are exception +// pointers entering landing pads. Certain instructions require values to be +// present in specific registers. That is also represented through fixed +// interference. +// + +/// computeRegUnitInterval - Compute the live interval of a register unit, based +/// on the uses and defs of aliasing registers. The interval should be empty, +/// or contain only dead phi-defs from ABI blocks. +void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { + unsigned Unit = LI->reg; + + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + + // The physregs aliasing Unit are the roots and their super-registers. + // Create all values as dead defs before extending to uses. Note that roots + // may share super-registers. That's OK because createDeadDefs() is + // idempotent. It is very rare for a register unit to have multiple roots, so + // uniquing super-registers is probably not worthwhile. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + unsigned Root = *Roots; + if (!MRI->reg_empty(Root)) + LRCalc->createDeadDefs(LI, Root); + for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + if (!MRI->reg_empty(*Supers)) + LRCalc->createDeadDefs(LI, *Supers); + } + } + + // Now extend LI to reach all uses. + // Ignore uses of reserved registers. We only track defs of those. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + unsigned Root = *Roots; + if (!MRI->isReserved(Root) && !MRI->reg_empty(Root)) + LRCalc->extendToUses(LI, Root); + for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + unsigned Reg = *Supers; + if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) + LRCalc->extendToUses(LI, Reg); + } + } +} + + +/// computeLiveInRegUnits - Precompute the live ranges of any register units +/// that are live-in to an ABI block somewhere. Register values can appear +/// without a corresponding def when entering the entry block or a landing pad. +/// +void LiveIntervals::computeLiveInRegUnits() { + RegUnitIntervals.resize(TRI->getNumRegUnits()); + DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); + + // Keep track of the intervals allocated. + SmallVector<LiveInterval*, 8> NewIntvs; + + // Check all basic blocks for live-ins. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock *MBB = MFI; + + // We only care about ABI blocks: Entry + landing pads. + if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty()) + continue; + + // Create phi-defs at Begin for all live-in registers. + SlotIndex Begin = Indexes->getMBBStartIdx(MBB); + DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber()); + for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(), + LIE = MBB->livein_end(); LII != LIE; ++LII) { + for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { + unsigned Unit = *Units; + LiveInterval *Intv = RegUnitIntervals[Unit]; + if (!Intv) { + Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF); + NewIntvs.push_back(Intv); + } + VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator()); + (void)VNI; + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); + } + } + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n"); + + // Compute the 'normal' part of the intervals. + for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i) + computeRegUnitInterval(NewIntvs[i]); +} + + +/// shrinkToUses - After removing some uses of a register, shrink its live +/// range to just the remaining uses. This method does not compute reaching +/// defs for new uses, and it doesn't remove dead defs. +bool LiveIntervals::shrinkToUses(LiveInterval *li, + SmallVectorImpl<MachineInstr*> *dead) { + DEBUG(dbgs() << "Shrink: " << *li << '\n'); + assert(TargetRegisterInfo::isVirtualRegister(li->reg) + && "Can only shrink virtual registers"); + // Find all the values used, including PHI kills. + SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList; + + // Blocks that have already been added to WorkList as live-out. + SmallPtrSet<MachineBasicBlock*, 16> LiveOut; + + // Visit all instructions reading li->reg. + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg); + MachineInstr *UseMI = I.skipInstruction();) { + if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) + continue; + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + LiveRangeQuery LRQ(*li, Idx); + VNInfo *VNI = LRQ.valueIn(); + if (!VNI) { + // This shouldn't happen: readsVirtualRegister returns true, but there is + // no live value. It is likely caused by a target getting <undef> flags + // wrong. + DEBUG(dbgs() << Idx << '\t' << *UseMI + << "Warning: Instr claims to read non-existent value in " + << *li << '\n'); + continue; + } + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. + if (VNInfo *DefVNI = LRQ.valueDefined()) + Idx = DefVNI->def; + + WorkList.push_back(std::make_pair(Idx, VNI)); + } + + // Create a new live interval with only minimal live segments per def. + LiveInterval NewLI(li->reg, 0); + for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); + } + + // Keep track of the PHIs that are in use. + SmallPtrSet<VNInfo*, 8> UsedPHIs; + + // Extend intervals to reach all uses in WorkList. + while (!WorkList.empty()) { + SlotIndex Idx = WorkList.back().first; + VNInfo *VNI = WorkList.back().second; + WorkList.pop_back(); + const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); + SlotIndex BlockStart = getMBBStartIdx(MBB); + + // Extend the live range for VNI to be live at Idx. + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + (void)ExtVNI; + assert(ExtVNI == VNI && "Unexpected existing value number"); + // Is this a PHIDef we haven't seen before? + if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI)) + continue; + // The PHI is live, make sure the predecessors are live-out. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + if (!LiveOut.insert(*PI)) + continue; + SlotIndex Stop = getMBBEndIdx(*PI); + // A predecessor is not required to have a live-out value for a PHI. + if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) + WorkList.push_back(std::make_pair(Stop, PVNI)); + } + continue; + } + + // VNI is live-in to MBB. + DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); + NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); + + // Make sure VNI is live-out from the predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + if (!LiveOut.insert(*PI)) + continue; + SlotIndex Stop = getMBBEndIdx(*PI); + assert(li->getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); + WorkList.push_back(std::make_pair(Stop, VNI)); + } + } + + // Handle dead values. + bool CanSeparate = false; + for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); + assert(LII != NewLI.end() && "Missing live range for PHI"); + if (LII->end != VNI->def.getDeadSlot()) + continue; + if (VNI->isPHIDef()) { + // This is a dead PHI. Remove it. + VNI->markUnused(); + NewLI.removeRange(*LII); + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + CanSeparate = true; + } else { + // This is a dead def. Make sure the instruction knows. + MachineInstr *MI = getInstructionFromIndex(VNI->def); + assert(MI && "No instruction defining live value"); + MI->addRegisterDead(li->reg, TRI); + if (dead && MI->allDefsAreDead()) { + DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); + dead->push_back(MI); + } + } + } + + // Move the trimmed ranges back. + li->ranges.swap(NewLI.ranges); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; +} + +void LiveIntervals::extendToIndices(LiveInterval *LI, + ArrayRef<SlotIndex> Indices) { + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) + LRCalc->extend(LI, Indices[i]); +} + +void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, + SmallVectorImpl<SlotIndex> *EndPoints) { + LiveRangeQuery LRQ(*LI, Kill); + VNInfo *VNI = LRQ.valueOut(); + if (!VNI) + return; + + MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); + SlotIndex MBBStart, MBBEnd; + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + + // If VNI isn't live out from KillMBB, the value is trivially pruned. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(Kill, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + return; + } + + // VNI is live out of KillMBB. + LI->removeRange(Kill, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + + // Find all blocks that are reachable from KillMBB without leaving VNI's live + // range. It is possible that KillMBB itself is reachable, so start a DFS + // from each successor. + typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy; + VisitedTy Visited; + for (MachineBasicBlock::succ_iterator + SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end(); + SuccI != SuccE; ++SuccI) { + for (df_ext_iterator<MachineBasicBlock*, VisitedTy> + I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited); + I != E;) { + MachineBasicBlock *MBB = *I; + + // Check if VNI is live in to MBB. + tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + LiveRangeQuery LRQ(*LI, MBBStart); + if (LRQ.valueIn() != VNI) { + // This block isn't part of the VNI live range. Prune the search. + I.skipChildren(); + continue; + } + + // Prune the search if VNI is killed in MBB. + if (LRQ.endPoint() < MBBEnd) { + LI->removeRange(MBBStart, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + I.skipChildren(); + continue; + } + + // VNI is live through MBB. + LI->removeRange(MBBStart, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + ++I; + } + } +} + +//===----------------------------------------------------------------------===// +// Register allocator hooks. +// + +void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { + // Keep track of regunit ranges. + SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU; + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = &getInterval(Reg); + if (LI->empty()) + continue; + + // Find the regunit intervals for the assigned register. They may overlap + // the virtual register live range, cancelling any kills. + RU.clear(); + for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); + ++Units) { + LiveInterval *RUInt = &getRegUnit(*Units); + if (RUInt->empty()) + continue; + RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + } + + // Every instruction that kills Reg corresponds to a live range end point. + for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; + ++RI) { + // A block index indicates an MBB edge. + if (RI->end.isBlock()) + continue; + MachineInstr *MI = getInstructionFromIndex(RI->end); + if (!MI) + continue; + + // Check if any of the reguints are live beyond the end of RI. That could + // happen when a physreg is defined as a copy of a virtreg: + // + // %EAX = COPY %vreg5 + // FOO %vreg5 <--- MI, cancel kill because %EAX is live. + // BAR %EAX<kill> + // + // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + bool CancelKill = false; + for (unsigned u = 0, e = RU.size(); u != e; ++u) { + LiveInterval *RInt = RU[u].first; + LiveInterval::iterator &I = RU[u].second; + if (I == RInt->end()) + continue; + I = RInt->advanceTo(I, RI->end); + if (I == RInt->end() || I->start >= RI->end) + continue; + // I is overlapping RI. + CancelKill = true; + break; + } + if (CancelKill) + MI->clearRegisterKills(Reg, NULL); + else + MI->addRegisterKilled(Reg, NULL); + } + } +} + +MachineBasicBlock* +LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { + // A local live range must be fully contained inside the block, meaning it is + // defined and killed at instructions, not at block boundaries. It is not + // live in or or out of any block. + // + // It is technically possible to have a PHI-defined live range identical to a + // single block, but we are going to return false in that case. + + SlotIndex Start = LI.beginIndex(); + if (Start.isBlock()) + return NULL; + + SlotIndex Stop = LI.endIndex(); + if (Stop.isBlock()) + return NULL; + + // getMBBFromIndex doesn't need to search the MBB table when both indexes + // belong to proper instructions. + MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start); + MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop); + return MBB1 == MBB2 ? MBB1 : NULL; +} + +bool +LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *PHI = *I; + if (PHI->isUnused() || !PHI->isPHIDef()) + continue; + const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); + // Conservatively return true instead of scanning huge predecessor lists. + if (PHIMBB->pred_size() > 100) + return true; + for (MachineBasicBlock::const_pred_iterator + PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI) + if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI))) + return true; + } + return false; +} + +float +LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { + // Limit the loop depth ridiculousness. + if (loopDepth > 200) + loopDepth = 200; + + // The loop depth is used to roughly estimate the number of times the + // instruction is executed. Something like 10^d is simple, but will quickly + // overflow a float. This expression behaves like 10^d for small d, but is + // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of + // headroom before overflow. + // By the way, powf() might be unavailable here. For consistency, + // We may take pow(double,double). + float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); + + return (isDef + isUse) * lc; +} + +LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, + MachineInstr* startInst) { + LiveInterval& Interval = getOrCreateInterval(reg); + VNInfo* VN = Interval.getNextValue( + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getVNInfoAllocator()); + LiveRange LR( + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getMBBEndIdx(startInst->getParent()), VN); + Interval.addRange(LR); + + return LR; +} + + +//===----------------------------------------------------------------------===// +// Register mask functions +//===----------------------------------------------------------------------===// + +bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, + BitVector &UsableRegs) { + if (LI.empty()) + return false; + LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end(); + + // Use a smaller arrays for local live ranges. + ArrayRef<SlotIndex> Slots; + ArrayRef<const uint32_t*> Bits; + if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) { + Slots = getRegMaskSlotsInBlock(MBB->getNumber()); + Bits = getRegMaskBitsInBlock(MBB->getNumber()); + } else { + Slots = getRegMaskSlots(); + Bits = getRegMaskBits(); + } + + // We are going to enumerate all the register mask slots contained in LI. + // Start with a binary search of RegMaskSlots to find a starting point. + ArrayRef<SlotIndex>::iterator SlotI = + std::lower_bound(Slots.begin(), Slots.end(), LiveI->start); + ArrayRef<SlotIndex>::iterator SlotE = Slots.end(); + + // No slots in range, LI begins after the last call. + if (SlotI == SlotE) + return false; + + bool Found = false; + for (;;) { + assert(*SlotI >= LiveI->start); + // Loop over all slots overlapping this segment. + while (*SlotI < LiveI->end) { + // *SlotI overlaps LI. Collect mask bits. + if (!Found) { + // This is the first overlap. Initialize UsableRegs to all ones. + UsableRegs.clear(); + UsableRegs.resize(TRI->getNumRegs(), true); + Found = true; + } + // Remove usable registers clobbered by this mask. + UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]); + if (++SlotI == SlotE) + return Found; + } + // *SlotI is beyond the current LI segment. + LiveI = LI.advanceTo(LiveI, *SlotI); + if (LiveI == LiveE) + return Found; + // Advance SlotI until it overlaps. + while (*SlotI < LiveI->start) + if (++SlotI == SlotE) + return Found; + } +} + +//===----------------------------------------------------------------------===// +// IntervalUpdate class. +//===----------------------------------------------------------------------===// + +// HMEditor is a toolkit used by handleMove to trim or extend live intervals. +class LiveIntervals::HMEditor { +private: + LiveIntervals& LIS; + const MachineRegisterInfo& MRI; + const TargetRegisterInfo& TRI; + SlotIndex OldIdx; + SlotIndex NewIdx; + SmallPtrSet<LiveInterval*, 8> Updated; + bool UpdateFlags; + +public: + HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI, + const TargetRegisterInfo& TRI, + SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags) + : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx), + UpdateFlags(UpdateFlags) {} + + // FIXME: UpdateFlags is a workaround that creates live intervals for all + // physregs, even those that aren't needed for regalloc, in order to update + // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill + // flags, and postRA passes will use a live register utility instead. + LiveInterval *getRegUnitLI(unsigned Unit) { + if (UpdateFlags) + return &LIS.getRegUnit(Unit); + return LIS.getCachedRegUnit(Unit); + } + + /// Update all live ranges touched by MI, assuming a move from OldIdx to + /// NewIdx. + void updateAllRanges(MachineInstr *MI) { + DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); + bool hasRegMask = false; + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (MO->isRegMask()) + hasRegMask = true; + if (!MO->isReg()) + continue; + // Aggressively clear all kill flags. + // They are reinserted by VirtRegRewriter. + if (MO->isUse()) + MO->setIsKill(false); + + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + updateRange(LIS.getInterval(Reg)); + continue; + } + + // For physregs, only update the regunits that actually have a + // precomputed live range. + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = getRegUnitLI(*Units)) + updateRange(*LI); + } + if (hasRegMask) + updateRegMaskSlots(); + } + +private: + /// Update a single live range, assuming an instruction has been moved from + /// OldIdx to NewIdx. + void updateRange(LiveInterval &LI) { + if (!Updated.insert(&LI)) + return; + DEBUG({ + dbgs() << " "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + dbgs() << PrintReg(LI.reg); + else + dbgs() << PrintRegUnit(LI.reg, &TRI); + dbgs() << ":\t" << LI << '\n'; + }); + if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) + handleMoveDown(LI); + else + handleMoveUp(LI); + DEBUG(dbgs() << " -->\t" << LI << '\n'); + LI.verify(); + } + + /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Move def to NewIdx, assert endpoint after NewIdx. + /// + /// 2. Live def at OldIdx, killed at NewIdx: + /// Change to dead def at NewIdx. + /// (Happens when bundling def+kill together). + /// + /// 3. Dead def at OldIdx: + /// Move def to NewIdx, possibly across another live value. + /// + /// 4. Def at OldIdx AND at NewIdx: + /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value read at OldIdx, killed before NewIdx: + /// Extend kill to NewIdx. + /// + void handleMoveDown(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; + + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + bool isKill = SlotIndex::isSameInstr(OldIdx, I->end); + // If the live-in value already extends to NewIdx, there is nothing to do. + if (!SlotIndex::isEarlierInstr(I->end, NewIdx)) + return; + // Aggressively remove all kill flags from the old kill point. + // Kill flags shouldn't be used while live intervals exist, they will be + // reinserted by VirtRegRewriter. + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end)) + for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse()) + MO->setIsKill(false); + // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // overlapping ranges. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + // If this was a kill, there may also be a def. Otherwise we're done. + if (!isKill) + return; + ++I; + } + + // Check for a def at OldIdx. + if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start)) + return; + // We have a def at OldIdx. + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + // If the defined value extends beyond NewIdx, just move the def down. + // This is case 1 above. + if (SlotIndex::isEarlierInstr(NewIdx, I->end)) { + I->start = DefVNI->def; + return; + } + // The remaining possibilities are now: + // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx). + // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot(). + // In either case, it is possible that there is an existing def at NewIdx. + assert((I->end == OldIdx.getDeadSlot() || + SlotIndex::isSameInstr(I->end, NewIdx)) && + "Cannot move def below kill"); + LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { + // There is an existing def at NewIdx, case 4 above. The def at OldIdx is + // coalesced into that value. + assert(NewI->valno != DefVNI && "Multiple defs of value?"); + LI.removeValNo(DefVNI); + return; + } + // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. + // If the def at OldIdx was dead, we allow it to be moved across other LI + // values. The new range should be placed immediately before NewI, move any + // intermediate ranges up. + assert(NewI != I && "Inconsistent iterators"); + std::copy(llvm::next(I), NewI, I); + *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + } + + /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Hoist def to NewIdx. + /// + /// 2. Dead def at OldIdx: + /// Hoist def+end to NewIdx, possibly move across other values. + /// + /// 3. Dead def at OldIdx AND existing def at NewIdx: + /// Remove value defined at OldIdx, coalescing it with existing value. + /// + /// 4. Live def at OldIdx AND existing def at NewIdx: + /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value killed at OldIdx: + /// Hoist kill to NewIdx, then scan for last kill between NewIdx and + /// OldIdx. + /// + void handleMoveUp(LiveInterval &LI) { + // First look for a kill at OldIdx. + LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); + LiveInterval::iterator E = LI.end(); + // Is LI even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; + + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + // If the live-in value isn't killed here, there is nothing to do. + if (!SlotIndex::isSameInstr(OldIdx, I->end)) + return; + // Adjust I->end to end at NewIdx. If we are hoisting a kill above + // another use, we need to search for that use. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + ++I; + // If OldIdx also defines a value, there couldn't have been another use. + if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { + // No def, search for the new kill. + // This can never be an early clobber kill since there is no def. + llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + return; + } + } + + // Now deal with the def at OldIdx. + assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?"); + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + + // Check for an existing def at NewIdx. + LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { + assert(NewI->valno != DefVNI && "Same value defined more than once?"); + // There is an existing def at NewIdx. + if (I->end.isDead()) { + // Case 3: Remove the dead def at OldIdx. + LI.removeValNo(DefVNI); + return; + } + // Case 4: Replace def at NewIdx with live def at OldIdx. + I->start = DefVNI->def; + LI.removeValNo(NewI->valno); + return; + } + + // There is no existing def at NewIdx. Hoist DefVNI. + if (!I->end.isDead()) { + // Leave the end point of a live def. + I->start = DefVNI->def; + return; + } + + // DefVNI is a dead def. It may have been moved across other values in LI, + // so move I up to NewI. Slide [NewI;I) down one position. + std::copy_backward(NewI, I, llvm::next(I)); + *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + } + + void updateRegMaskSlots() { + SmallVectorImpl<SlotIndex>::iterator RI = + std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), + OldIdx); + assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && + "No RegMask at OldIdx."); + *RI = NewIdx.getRegSlot(); + assert((RI == LIS.RegMaskSlots.begin() || + SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((llvm::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && + "Cannot move regmask instruction below another call"); + } + + // Return the last use of reg between NewIdx and OldIdx. + SlotIndex findLastUseBefore(unsigned Reg) { + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + SlotIndex LastUse = NewIdx; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(Reg), + UE = MRI.use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); + if (InstSlot > LastUse && InstSlot < OldIdx) + LastUse = InstSlot; + } + return LastUse; + } + + // This is a regunit interval, so scanning the use list could be very + // expensive. Scan upwards from OldIdx instead. + assert(NewIdx < OldIdx && "Expected upwards move"); + SlotIndexes *Indexes = LIS.getSlotIndexes(); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx); + + // OldIdx may not correspond to an instruction any longer, so set MII to + // point to the next instruction after OldIdx, or MBB->end(). + MachineBasicBlock::iterator MII = MBB->end(); + if (MachineInstr *MI = Indexes->getInstructionFromIndex( + Indexes->getNextNonNullIndex(OldIdx))) + if (MI->getParent() == MBB) + MII = MI; + + MachineBasicBlock::iterator Begin = MBB->begin(); + while (MII != Begin) { + if ((--MII)->isDebugValue()) + continue; + SlotIndex Idx = Indexes->getInstructionIndex(MII); + + // Stop searching when NewIdx is reached. + if (!SlotIndex::isEarlierInstr(NewIdx, Idx)) + return NewIdx; + + // Check if MII uses Reg. + for (MIBundleOperands MO(MII); MO.isValid(); ++MO) + if (MO->isReg() && + TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + TRI.hasRegUnit(MO->getReg(), Reg)) + return Idx; + } + // Didn't reach NewIdx. It must be the first instruction in the block. + return NewIdx; + } +}; + +void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) { + assert(!MI->isBundled() && "Can't handle bundled instructions yet."); + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); + Indexes->removeMachineInstrFromMaps(MI); + SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); + assert(getMBBStartIdx(MI->getParent()) <= OldIndex && + OldIndex < getMBBEndIdx(MI->getParent()) && + "Cannot handle moves across basic block boundaries."); + + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); +} + +void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, + MachineInstr* BundleStart, + bool UpdateFlags) { + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); + SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); +} + +void +LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + ArrayRef<unsigned> OrigRegs) { + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !Indexes->hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !Indexes->hasIndex(End)) + ++End; + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB).getPrevSlot(); + else + endIdx = getInstructionIndex(End); + + Indexes->repairIndexesInRange(MBB, Begin, End); + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg() && + TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + !hasInterval(MOI->getReg())) { + LiveInterval &LI = getOrCreateInterval(MOI->getReg()); + computeVirtRegInterval(&LI); + } + } + } + + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { + unsigned Reg = OrigRegs[i]; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + LiveInterval &LI = getInterval(Reg); + // FIXME: Should we support undefs that gain defs? + if (!LI.hasAtLeastOneValue()) + continue; + + LiveInterval::iterator LII = LI.find(endIdx); + SlotIndex lastUseIdx; + if (LII != LI.end() && LII->start < endIdx) + lastUseIdx = LII->end; + else + --LII; + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SlotIndex instrIdx = getInstructionIndex(MI); + bool isStartValid = getInstructionFromIndex(LII->start); + bool isEndValid = getInstructionFromIndex(LII->end); + + // FIXME: This doesn't currently handle early-clobber or multiple removed + // defs inside of the region to repair. + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + const MachineOperand &MO = *OI; + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + if (MO.isDef()) { + if (!isStartValid) { + if (LII->end.isDead()) { + SlotIndex prevStart; + if (LII != LI.begin()) + prevStart = llvm::prior(LII)->start; + + // FIXME: This could be more efficient if there was a removeRange + // method that returned an iterator. + LI.removeRange(*LII, true); + if (prevStart.isValid()) + LII = LI.find(prevStart); + else + LII = LI.begin(); + } else { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + continue; + } + } + + if (!lastUseIdx.isValid()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); + LII = LI.addRange(LR); + } else if (LII->start != instrIdx.getRegSlot()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LI.addRange(LR); + } + + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + } else if (MO.isUse()) { + // FIXME: This should probably be handled outside of this branch, + // either as part of the def case (for defs inside of the region) or + // after the loop over the region. + if (!isEndValid && !LII->end.isBlock()) + LII->end = instrIdx.getRegSlot(); + if (!lastUseIdx.isValid()) + lastUseIdx = instrIdx.getRegSlot(); + } + } + } + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp new file mode 100644 index 0000000..d5a81a3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -0,0 +1,204 @@ +//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LiveIntervalUnion represents a coalesced set of live intervals. This may be +// used during coalescing to represent a congruence class, or during register +// allocation to model liveness of a physical register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> + +using namespace llvm; + + +// Merge a LiveInterval's segments. Guarantee no overlaps. +void LiveIntervalUnion::unify(LiveInterval &VirtReg) { + if (VirtReg.empty()) + return; + ++Tag; + + // Insert each of the virtual register's live segments into the map. + LiveInterval::iterator RegPos = VirtReg.begin(); + LiveInterval::iterator RegEnd = VirtReg.end(); + SegmentIter SegPos = Segments.find(RegPos->start); + + while (SegPos.valid()) { + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); + if (++RegPos == RegEnd) + return; + SegPos.advanceTo(RegPos->start); + } + + // We have reached the end of Segments, so it is no longer necessary to search + // for the insertion position. + // It is faster to insert the end first. + --RegEnd; + SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg); + for (; RegPos != RegEnd; ++RegPos, ++SegPos) + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); +} + +// Remove a live virtual register's segments from this union. +void LiveIntervalUnion::extract(LiveInterval &VirtReg) { + if (VirtReg.empty()) + return; + ++Tag; + + // Remove each of the virtual register's live segments from the map. + LiveInterval::iterator RegPos = VirtReg.begin(); + LiveInterval::iterator RegEnd = VirtReg.end(); + SegmentIter SegPos = Segments.find(RegPos->start); + + for (;;) { + assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval"); + SegPos.erase(); + if (!SegPos.valid()) + return; + + // Skip all segments that may have been coalesced. + RegPos = VirtReg.advanceTo(RegPos, SegPos.start()); + if (RegPos == RegEnd) + return; + + SegPos.advanceTo(RegPos->start); + } +} + +void +LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + if (empty()) { + OS << " empty\n"; + return; + } + for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { + OS << " [" << SI.start() << ' ' << SI.stop() << "):" + << PrintReg(SI.value()->reg, TRI); + } + OS << '\n'; +} + +#ifndef NDEBUG +// Verify the live intervals in this union and add them to the visited set. +void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { + for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI) + VisitedVRegs.set(SI.value()->reg); +} +#endif //!NDEBUG + +// Scan the vector of interfering virtual registers in this union. Assume it's +// quite small. +bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { + SmallVectorImpl<LiveInterval*>::const_iterator I = + std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg); + return I != InterferingVRegs.end(); +} + +// Collect virtual registers in this union that interfere with this +// query's live virtual register. +// +// The query state is one of: +// +// 1. CheckedFirstInterference == false: Iterators are uninitialized. +// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused. +// 3. Iterators left at the last seen intersection. +// +unsigned LiveIntervalUnion::Query:: +collectInterferingVRegs(unsigned MaxInterferingRegs) { + // Fast path return if we already have the desired information. + if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); + + // Set up iterators on the first call. + if (!CheckedFirstInterference) { + CheckedFirstInterference = true; + + // Quickly skip interference check for empty sets. + if (VirtReg->empty() || LiveUnion->empty()) { + SeenAllInterferences = true; + return 0; + } + + // In most cases, the union will start before VirtReg. + VirtRegI = VirtReg->begin(); + LiveUnionI.setMap(LiveUnion->getMap()); + LiveUnionI.find(VirtRegI->start); + } + + LiveInterval::iterator VirtRegEnd = VirtReg->end(); + LiveInterval *RecentReg = 0; + while (LiveUnionI.valid()) { + assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg"); + + // Check for overlapping interference. + while (VirtRegI->start < LiveUnionI.stop() && + VirtRegI->end > LiveUnionI.start()) { + // This is an overlap, record the interfering register. + LiveInterval *VReg = LiveUnionI.value(); + if (VReg != RecentReg && !isSeenInterference(VReg)) { + RecentReg = VReg; + InterferingVRegs.push_back(VReg); + if (InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); + } + // This LiveUnion segment is no longer interesting. + if (!(++LiveUnionI).valid()) { + SeenAllInterferences = true; + return InterferingVRegs.size(); + } + } + + // The iterators are now not overlapping, LiveUnionI has been advanced + // beyond VirtRegI. + assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap"); + + // Advance the iterator that ends first. + VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start()); + if (VirtRegI == VirtRegEnd) + break; + + // Detect overlap, handle above. + if (VirtRegI->start < LiveUnionI.stop()) + continue; + + // Still not overlapping. Catch up LiveUnionI. + LiveUnionI.advanceTo(VirtRegI->start); + } + SeenAllInterferences = true; + return InterferingVRegs.size(); +} + +void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, + unsigned NSize) { + // Reuse existing allocation. + if (NSize == Size) + return; + clear(); + Size = NSize; + LIUs = static_cast<LiveIntervalUnion*>( + malloc(sizeof(LiveIntervalUnion)*NSize)); + for (unsigned i = 0; i != Size; ++i) + new(LIUs + i) LiveIntervalUnion(Alloc); +} + +void LiveIntervalUnion::Array::clear() { + if (!LIUs) + return; + for (unsigned i = 0; i != Size; ++i) + LIUs[i].~LiveIntervalUnion(); + free(LIUs); + Size = 0; + LIUs = 0; +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp new file mode 100644 index 0000000..dede490 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -0,0 +1,380 @@ +//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the LiveRangeCalc class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "LiveRangeCalc.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +void LiveRangeCalc::reset(const MachineFunction *mf, + SlotIndexes *SI, + MachineDominatorTree *MDT, + VNInfo::Allocator *VNIA) { + MF = mf; + MRI = &MF->getRegInfo(); + Indexes = SI; + DomTree = MDT; + Alloc = VNIA; + + unsigned N = MF->getNumBlockIDs(); + Seen.clear(); + Seen.resize(N); + LiveOut.resize(N); + LiveIn.clear(); +} + + +void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { + assert(MRI && Indexes && "call reset() first"); + + // Visit all def operands. If the same instruction has multiple defs of Reg, + // LI->createDeadDef() will deduplicate. + for (MachineRegisterInfo::def_iterator + I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { + const MachineInstr *MI = &*I; + // Find the corresponding slot index. + SlotIndex Idx; + if (MI->isPHI()) + // PHI defs begin at the basic block start index. + Idx = Indexes->getMBBStartIdx(MI->getParent()); + else + // Instructions are either normal 'r', or early clobber 'e'. + Idx = Indexes->getInstructionIndex(MI) + .getRegSlot(I.getOperand().isEarlyClobber()); + + // Create the def in LI. This may find an existing def. + LI->createDeadDef(Idx, *Alloc); + } +} + + +void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { + assert(MRI && Indexes && "call reset() first"); + + // Visit all operands that read Reg. This may include partial defs. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), + E = MRI->reg_nodbg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + // Clear all kill flags. They will be reinserted after register allocation + // by LiveIntervalAnalysis::addKillFlags(). + if (MO.isUse()) + MO.setIsKill(false); + if (!MO.readsReg()) + continue; + // MI is reading Reg. We may have visited MI before if it happens to be + // reading Reg multiple times. That is OK, extend() is idempotent. + const MachineInstr *MI = &*I; + + // Find the SlotIndex being read. + SlotIndex Idx; + if (MI->isPHI()) { + assert(!MO.isDef() && "Cannot handle PHI def of partial register."); + // PHI operands are paired: (Reg, PredMBB). + // Extend the live range to be live-out from PredMBB. + Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB()); + } else { + // This is a normal instruction. + Idx = Indexes->getInstructionIndex(MI).getRegSlot(); + // Check for early-clobber redefs. + unsigned DefIdx; + if (MO.isDef()) { + if (MO.isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) { + // FIXME: This would be a lot easier if tied early-clobber uses also + // had an early-clobber flag. + if (MI->getOperand(DefIdx).isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + } + extend(LI, Idx, Reg); + } +} + + +// Transfer information from the LiveIn vector to the live ranges. +void LiveRangeCalc::updateLiveIns() { + LiveRangeUpdater Updater; + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), + E = LiveIn.end(); I != E; ++I) { + if (!I->DomNode) + continue; + MachineBasicBlock *MBB = I->DomNode->getBlock(); + assert(I->Value && "No live-in value found"); + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(MBB); + + if (I->Kill.isValid()) + // Value is killed inside this block. + End = I->Kill; + else { + // The value is live-through, update LiveOut as well. + // Defer the Domtree lookup until it is needed. + assert(Seen.test(MBB->getNumber())); + LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); + } + Updater.setDest(I->LI); + Updater.add(Start, End, I->Value); + } + LiveIn.clear(); +} + + +void LiveRangeCalc::extend(LiveInterval *LI, + SlotIndex Kill, + unsigned PhysReg) { + assert(LI && "Missing live range"); + assert(Kill.isValid() && "Invalid SlotIndex"); + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + + MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot()); + assert(KillMBB && "No MBB at Kill"); + + // Is there a def in the same MBB we can extend? + if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + return; + + // Find the single reaching def, or determine if Kill is jointly dominated by + // multiple values, and we may need to create even more phi-defs to preserve + // VNInfo SSA form. Perform a search for all predecessor blocks where we + // know the dominating VNInfo. + if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + return; + + // When there were multiple different values, we may need new PHIs. + calculateValues(); +} + + +// This function is called by a client after using the low-level API to add +// live-out and live-in blocks. The unique value optimization is not +// available, SplitEditor::transferValues handles that case directly anyway. +void LiveRangeCalc::calculateValues() { + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + updateSSA(); + updateLiveIns(); +} + + +bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg) { + unsigned KillMBBNum = KillMBB->getNumber(); + + // Block numbers where LI should be live-in. + SmallVector<unsigned, 16> WorkList(1, KillMBBNum); + + // Remember if we have seen more than one value. + bool UniqueVNI = true; + VNInfo *TheVNI = 0; + + // Using Seen as a visited set, perform a BFS for all reaching defs. + for (unsigned i = 0; i != WorkList.size(); ++i) { + MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); + +#ifndef NDEBUG + if (MBB->pred_empty()) { + MBB->getParent()->verify(); + llvm_unreachable("Use not jointly dominated by defs."); + } + + if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && + !MBB->isLiveIn(PhysReg)) { + MBB->getParent()->verify(); + errs() << "The register needs to be live in to BB#" << MBB->getNumber() + << ", but is missing from the live-in list.\n"; + llvm_unreachable("Invalid global physical register"); + } +#endif + + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + + // Is this a known live-out block? + if (Seen.test(Pred->getNumber())) { + if (VNInfo *VNI = LiveOut[Pred].first) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + } + continue; + } + + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(Pred); + + // First time we see Pred. Try to determine the live-out value, but set + // it as null if Pred is live-through with an unknown value. + VNInfo *VNI = LI->extendInBlock(Start, End); + setLiveOutValue(Pred, VNI); + if (VNI) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + continue; + } + + // No, we need a live-in value for Pred as well + if (Pred != KillMBB) + WorkList.push_back(Pred->getNumber()); + else + // Loopback to KillMBB, so value is really live through. + Kill = SlotIndex(); + } + } + + LiveIn.clear(); + + // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but + // neither require it. Skip the sorting overhead for small updates. + if (WorkList.size() > 4) + array_pod_sort(WorkList.begin(), WorkList.end()); + + // If a unique reaching def was found, blit in the live ranges immediately. + if (UniqueVNI) { + LiveRangeUpdater Updater(LI); + for (SmallVectorImpl<unsigned>::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(*I); + // Trim the live range in KillMBB. + if (*I == KillMBBNum && Kill.isValid()) + End = Kill; + else + LiveOut[MF->getBlockNumbered(*I)] = + LiveOutPair(TheVNI, (MachineDomTreeNode *)0); + Updater.add(Start, End, TheVNI); + } + return true; + } + + // Multiple values were found, so transfer the work list to the LiveIn array + // where UpdateSSA will use it as a work list. + LiveIn.reserve(WorkList.size()); + for (SmallVectorImpl<unsigned>::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(*I); + addLiveInBlock(LI, DomTree->getNode(MBB)); + if (MBB == KillMBB) + LiveIn.back().Kill = Kill; + } + + return false; +} + + +// This is essentially the same iterative algorithm that SSAUpdater uses, +// except we already have a dominator tree, so we don't have to recompute it. +void LiveRangeCalc::updateSSA() { + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + + // Interate until convergence. + unsigned Changes; + do { + Changes = 0; + // Propagate live-out values down the dominator tree, inserting phi-defs + // when necessary. + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), + E = LiveIn.end(); I != E; ++I) { + MachineDomTreeNode *Node = I->DomNode; + // Skip block if the live-in value has already been determined. + if (!Node) + continue; + MachineBasicBlock *MBB = Node->getBlock(); + MachineDomTreeNode *IDom = Node->getIDom(); + LiveOutPair IDomValue; + + // We need a live-in value to a block with no immediate dominator? + // This is probably an unreachable block that has survived somehow. + bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber()); + + // IDom dominates all of our predecessors, but it may not be their + // immediate dominator. Check if any of them have live-out values that are + // properly dominated by IDom. If so, we need a phi-def here. + if (!needPHI) { + IDomValue = LiveOut[IDom->getBlock()]; + + // Cache the DomTree node that defined the value. + if (IDomValue.first && !IDomValue.second) + LiveOut[IDom->getBlock()].second = IDomValue.second = + DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); + + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + LiveOutPair &Value = LiveOut[*PI]; + if (!Value.first || Value.first == IDomValue.first) + continue; + + // Cache the DomTree node that defined the value. + if (!Value.second) + Value.second = + DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def)); + + // This predecessor is carrying something other than IDomValue. + // It could be because IDomValue hasn't propagated yet, or it could be + // because MBB is in the dominance frontier of that value. + if (DomTree->dominates(IDom, Value.second)) { + needPHI = true; + break; + } + } + } + + // The value may be live-through even if Kill is set, as can happen when + // we are called from extendRange. In that case LiveOutSeen is true, and + // LiveOut indicates a foreign or missing value. + LiveOutPair &LOP = LiveOut[MBB]; + + // Create a phi-def if required. + if (needPHI) { + ++Changes; + assert(Alloc && "Need VNInfo allocator to create PHI-defs"); + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(MBB); + VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); + I->Value = VNI; + // This block is done, we know the final value. + I->DomNode = 0; + + // Add liveness since updateLiveIns now skips this node. + if (I->Kill.isValid()) + I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { + I->LI->addRange(LiveRange(Start, End, VNI)); + LOP = LiveOutPair(VNI, Node); + } + } else if (IDomValue.first) { + // No phi-def here. Remember incoming value. + I->Value = IDomValue.first; + + // If the IDomValue is killed in the block, don't propagate through. + if (I->Kill.isValid()) + continue; + + // Propagate IDomValue if it isn't killed: + // MBB is live-out and doesn't define its own value. + if (LOP.first == IDomValue.first) + continue; + ++Changes; + LOP = IDomValue; + } + } + } while (Changes); +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h new file mode 100644 index 0000000..57cab7b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -0,0 +1,242 @@ +//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeCalc class can be used to compute live ranges from scratch. It +// caches information about values in the CFG to speed up repeated operations +// on the same live range. The cache can be shared by non-overlapping live +// ranges. SplitKit uses that when computing the live range of split products. +// +// A low-level interface is available to clients that know where a variable is +// live, but don't know which value it has as every point. LiveRangeCalc will +// propagate values down the dominator tree, and even insert PHI-defs where +// needed. SplitKit uses this faster interface when possible. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVERANGECALC_H +#define LLVM_CODEGEN_LIVERANGECALC_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/CodeGen/LiveInterval.h" + +namespace llvm { + +/// Forward declarations for MachineDominators.h: +class MachineDominatorTree; +template <class NodeT> class DomTreeNodeBase; +typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; + +class LiveRangeCalc { + const MachineFunction *MF; + const MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + MachineDominatorTree *DomTree; + VNInfo::Allocator *Alloc; + + /// Seen - Bit vector of active entries in LiveOut, also used as a visited + /// set by findReachingDefs. One entry per basic block, indexed by block + /// number. This is kept as a separate bit vector because it can be cleared + /// quickly when switching live ranges. + BitVector Seen; + + /// LiveOutPair - A value and the block that defined it. The domtree node is + /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. + typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; + + /// LiveOutMap - Map basic blocks to the value leaving the block. + typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; + + /// LiveOut - Map each basic block where a live range is live out to the + /// live-out value and its defining block. + /// + /// For every basic block, MBB, one of these conditions shall be true: + /// + /// 1. !Seen.count(MBB->getNumber()) + /// Blocks without a Seen bit are ignored. + /// 2. LiveOut[MBB].second.getNode() == MBB + /// The live-out value is defined in MBB. + /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB] + /// The live-out value passses through MBB. All predecessors must carry + /// the same value. + /// + /// The domtree node may be null, it can be computed. + /// + /// The map can be shared by multiple live ranges as long as no two are + /// live-out of the same block. + LiveOutMap LiveOut; + + /// LiveInBlock - Information about a basic block where a live range is known + /// to be live-in, but the value has not yet been determined. + struct LiveInBlock { + // LI - The live range that is live-in to this block. The algorithms can + // handle multiple non-overlapping live ranges simultaneously. + LiveInterval *LI; + + // DomNode - Dominator tree node for the block. + // Cleared when the final value has been determined and LI has been updated. + MachineDomTreeNode *DomNode; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. When the final value has been + // determined, the range from the block start to Kill will be added to LI. + SlotIndex Kill; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value; + + LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill) + : LI(li), DomNode(node), Kill(kill), Value(0) {} + }; + + /// LiveIn - Work list of blocks where the live-in value has yet to be + /// determined. This list is typically computed by findReachingDefs() and + /// used as a work list by updateSSA(). The low-level interface may also be + /// used to add entries directly. + SmallVector<LiveInBlock, 16> LiveIn; + + /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set + /// of defs that can reach it. + /// + /// If only one def can reach Kill, all paths from the def to kill are added + /// to LI, and the function returns true. + /// + /// If multiple values can reach Kill, the blocks that need LI to be live in + /// are added to the LiveIn array, and the function returns false. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + bool findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg); + + /// updateSSA - Compute the values that will be live in to all requested + /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. + /// + /// Every live-in block must be jointly dominated by the added live-out + /// blocks. No values are read from the live ranges. + void updateSSA(); + + /// Add liveness as specified in the LiveIn vector. + void updateLiveIns(); + +public: + LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + + //===--------------------------------------------------------------------===// + // High-level interface. + //===--------------------------------------------------------------------===// + // + // Calculate live ranges from scratch. + // + + /// reset - Prepare caches for a new set of non-overlapping live ranges. The + /// caches must be reset before attempting calculations with a live range + /// that may overlap a previously computed live range, and before the first + /// live range in a function. If live ranges are not known to be + /// non-overlapping, call reset before each. + void reset(const MachineFunction *MF, + SlotIndexes*, + MachineDominatorTree*, + VNInfo::Allocator*); + + /// calculate - Calculate the live range of a virtual register from its defs + /// and uses. LI must be empty with no values. + void calculate(LiveInterval *LI); + + //===--------------------------------------------------------------------===// + // Mid-level interface. + //===--------------------------------------------------------------------===// + // + // Modify existing live ranges. + // + + /// extend - Extend the live range of LI to reach Kill. + /// + /// The existing values in LI must be live so they jointly dominate Kill. If + /// Kill is not dominated by a single existing value, PHI-defs are inserted + /// as required to preserve SSA form. If Kill is known to be dominated by a + /// single existing value, Alloc may be null. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); + + /// createDeadDefs - Create a dead def in LI for every def operand of Reg. + /// Each instruction defining Reg gets a new VNInfo with a corresponding + /// minimal live range. + void createDeadDefs(LiveInterval *LI, unsigned Reg); + + /// createDeadDefs - Create a dead def in LI for every def of LI->reg. + void createDeadDefs(LiveInterval *LI) { + createDeadDefs(LI, LI->reg); + } + + /// extendToUses - Extend the live range of LI to reach all uses of Reg. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveInterval *LI, unsigned Reg); + + /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. + void extendToUses(LiveInterval *LI) { + extendToUses(LI, LI->reg); + } + + //===--------------------------------------------------------------------===// + // Low-level interface. + //===--------------------------------------------------------------------===// + // + // These functions can be used to compute live ranges where the live-in and + // live-out blocks are already known, but the SSA value in each block is + // unknown. + // + // After calling reset(), add known live-out values and known live-in blocks. + // Then call calculateValues() to compute the actual value that is + // live-in to each block, and add liveness to the live ranges. + // + + /// setLiveOutValue - Indicate that VNI is live out from MBB. The + /// calculateValues() function will not add liveness for MBB, the caller + /// should take care of that. + /// + /// VNI may be null only if MBB is a live-through block also passed to + /// addLiveInBlock(). + void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { + Seen.set(MBB->getNumber()); + LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + } + + /// addLiveInBlock - Add a block with an unknown live-in value. This + /// function can only be called once per basic block. Once the live-in value + /// has been determined, calculateValues() will add liveness to LI. + /// + /// @param LI The live range that is live-in to the block. + /// @param DomNode The domtree node for the block. + /// @param Kill Index in block where LI is killed. If the value is + /// live-through, set Kill = SLotIndex() and also call + /// setLiveOutValue(MBB, 0). + void addLiveInBlock(LiveInterval *LI, + MachineDomTreeNode *DomNode, + SlotIndex Kill = SlotIndex()) { + LiveIn.push_back(LiveInBlock(LI, DomNode, Kill)); + } + + /// calculateValues - Calculate the value that will be live-in to each block + /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA + /// form. Add liveness to all live-in blocks up to the Kill point, or the + /// whole block for live-through blocks. + /// + /// Every predecessor of a live-in block must have been given a value with + /// setLiveOutValue, the value may be null for live-trough blocks. + void calculateValues(); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp new file mode 100644 index 0000000..7793e96 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -0,0 +1,387 @@ +//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeEdit class represents changes done to a virtual register when it +// is spilled or split. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); +STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); +STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); + +void LiveRangeEdit::Delegate::anchor() { } + +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + if (VRM) { + VRM->grow(); + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } + LiveInterval &LI = LIS.getOrCreateInterval(VReg); + NewRegs.push_back(&LI); + return LI; +} + +bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, + const MachineInstr *DefMI, + AliasAnalysis *aa) { + assert(DefMI && "Missing instruction"); + ScannedRemattable = true; + if (!TII.isTriviallyReMaterializable(DefMI, aa)) + return false; + Remattable.insert(VNI); + return true; +} + +void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { + for (LiveInterval::vni_iterator I = getParent().vni_begin(), + E = getParent().vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); + if (!DefMI) + continue; + checkRematerializable(VNI, DefMI, aa); + } + ScannedRemattable = true; +} + +bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { + if (!ScannedRemattable) + scanRemattable(aa); + return !Remattable.empty(); +} + +/// allUsesAvailableAt - Return true if all registers used by OrigMI at +/// OrigIdx are also available with the same value at UseIdx. +bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) const { + OrigIdx = OrigIdx.getRegSlot(true); + UseIdx = UseIdx.getRegSlot(true); + for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OrigMI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) + continue; + + // We can't remat physreg uses, unless it is a constant. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent())) + continue; + return false; + } + + LiveInterval &li = LIS.getInterval(MO.getReg()); + const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); + if (!OVNI) + continue; + + // Don't allow rematerialization immediately after the original def. + // It would be incorrect if OrigMI redefines the register. + // See PR14098. + if (SlotIndex::isSameInstr(OrigIdx, UseIdx)) + return false; + + if (OVNI != li.getVNInfoAt(UseIdx)) + return false; + } + return true; +} + +bool LiveRangeEdit::canRematerializeAt(Remat &RM, + SlotIndex UseIdx, + bool cheapAsAMove) { + assert(ScannedRemattable && "Call anyRematerializable first"); + + // Use scanRemattable info. + if (!Remattable.count(RM.ParentVNI)) + return false; + + // No defining instruction provided. + SlotIndex DefIdx; + if (RM.OrigMI) + DefIdx = LIS.getInstructionIndex(RM.OrigMI); + else { + DefIdx = RM.ParentVNI->def; + RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); + assert(RM.OrigMI && "No defining instruction for remattable value"); + } + + // If only cheap remats were requested, bail out early. + if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove()) + return false; + + // Verify that all used registers are available with the same values. + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx)) + return false; + + return true; +} + +SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, + const Remat &RM, + const TargetRegisterInfo &tri, + bool Late) { + assert(RM.OrigMI && "Invalid remat"); + TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + Rematted.insert(RM.ParentVNI); + return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getRegSlot(); +} + +void LiveRangeEdit::eraseVirtReg(unsigned Reg) { + if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg)) + LIS.removeInterval(Reg); +} + +bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, + SmallVectorImpl<MachineInstr*> &Dead) { + MachineInstr *DefMI = 0, *UseMI = 0; + + // Check that there is a single def and a single use. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MO.isDef()) { + if (DefMI && DefMI != MI) + return false; + if (!MI->canFoldAsLoad()) + return false; + DefMI = MI; + } else if (!MO.isUndef()) { + if (UseMI && UseMI != MI) + return false; + // FIXME: Targets don't know how to fold subreg uses. + if (MO.getSubReg()) + return false; + UseMI = MI; + } + } + if (!DefMI || !UseMI) + return false; + + // Since we're moving the DefMI load, make sure we're not extending any live + // ranges. + if (!allUsesAvailableAt(DefMI, + LIS.getInstructionIndex(DefMI), + LIS.getInstructionIndex(UseMI))) + return false; + + // We also need to make sure it is safe to move the load. + // Assume there are stores between DefMI and UseMI. + bool SawStore = true; + if (!DefMI->isSafeToMove(&TII, 0, SawStore)) + return false; + + DEBUG(dbgs() << "Try to fold single def: " << *DefMI + << " into single use: " << *UseMI); + + SmallVector<unsigned, 8> Ops; + if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) + return false; + + MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI); + if (!FoldMI) + return false; + DEBUG(dbgs() << " folded: " << *FoldMI); + LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); + UseMI->eraseFromParent(); + DefMI->addRegisterDead(LI->reg, 0); + Dead.push_back(DefMI); + ++NumDCEFoldedLoads; + return true; +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + ArrayRef<unsigned> RegsBeingSpilled) { + SetVector<LiveInterval*, + SmallVector<LiveInterval*, 8>, + SmallPtrSet<LiveInterval*, 8> > ToShrink; + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) { + MachineInstr *MI = Dead.pop_back_val(); + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + continue; + } + + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + continue; + } + + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Collect virtual registers to be erased after MI is gone. + SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) + ReadsPhysRegs = true; + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.killedAt(Idx))) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { + if (TheDelegate) + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) + RegsToErase.push_back(Reg); + } + } + } + + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); + } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be <undef> + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } + } + + if (ToShrink.empty()) + break; + + // Shrink just one live interval. Then delete new dead defs. + LiveInterval *LI = ToShrink.back(); + ToShrink.pop_back(); + if (foldAsLoad(LI, Dead)) + continue; + if (TheDelegate) + TheDelegate->LRE_WillShrinkVirtReg(LI->reg); + if (!LIS.shrinkToUses(LI, &Dead)) + continue; + + // Don't create new intervals for a register being spilled. + // The new intervals would have to be spilled anyway so its not worth it. + // Also they currently aren't spilled so creating them and not spilling + // them results in incorrect code. + bool BeingSpilled = false; + for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { + if (LI->reg == RegsBeingSpilled[i]) { + BeingSpilled = true; + break; + } + } + + if (BeingSpilled) continue; + + // LI may have been separated, create new intervals. + LI->RenumberValues(LIS); + ConnectedVNInfoEqClasses ConEQ(LIS); + unsigned NumComp = ConEQ.Classify(LI); + if (NumComp <= 1) + continue; + ++NumFracRanges; + bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; + DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); + SmallVector<LiveInterval*, 8> Dups(1, LI); + for (unsigned i = 1; i != NumComp; ++i) { + Dups.push_back(&createFrom(LI->reg)); + // If LI is an original interval that hasn't been split yet, make the new + // intervals their own originals instead of referring to LI. The original + // interval must contain all the split products, and LI doesn't. + if (IsOriginal) + VRM->setIsSplitFromReg(Dups.back()->reg, 0); + if (TheDelegate) + TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + } + ConEQ.Distribute(&Dups[0], MRI); + DEBUG({ + for (unsigned i = 0; i != NumComp; ++i) + dbgs() << '\t' << *Dups[i] << '\n'; + }); + } +} + +void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + const MachineLoopInfo &Loops) { + VirtRegAuxInfo VRAI(MF, LIS, Loops); + for (iterator I = begin(), E = end(); I != E; ++I) { + LiveInterval &LI = **I; + if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) + DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " + << MRI.getRegClass(LI.reg)->getName() << '\n'); + VRAI.CalculateWeightAndHint(LI); + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp new file mode 100644 index 0000000..0ef069f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -0,0 +1,154 @@ +//===-- LiveRegMatrix.cpp - Track register interference -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveRegMatrix analysis pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "RegisterCoalescer.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(NumAssigned , "Number of registers assigned"); +STATISTIC(NumUnassigned , "Number of registers unassigned"); + +char LiveRegMatrix::ID = 0; +INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) + +LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID), + UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {} + +void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<LiveIntervals>(); + AU.addRequiredTransitive<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LIS = &getAnalysis<LiveIntervals>(); + VRM = &getAnalysis<VirtRegMap>(); + + unsigned NumRegUnits = TRI->getNumRegUnits(); + if (NumRegUnits != Matrix.size()) + Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]); + Matrix.init(LIUAlloc, NumRegUnits); + + // Make sure no stale queries get reused. + invalidateVirtRegs(); + return false; +} + +void LiveRegMatrix::releaseMemory() { + for (unsigned i = 0, e = Matrix.size(); i != e; ++i) { + Matrix[i].clear(); + Queries[i].clear(); + } +} + +void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) + << " to " << PrintReg(PhysReg, TRI) << ':'); + assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); + Matrix[*Units].unify(VirtReg); + } + ++NumAssigned; + DEBUG(dbgs() << '\n'); +} + +void LiveRegMatrix::unassign(LiveInterval &VirtReg) { + unsigned PhysReg = VRM->getPhys(VirtReg.reg); + DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) + << " from " << PrintReg(PhysReg, TRI) << ':'); + VRM->clearVirt(VirtReg.reg); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); + Matrix[*Units].extract(VirtReg); + } + ++NumUnassigned; + DEBUG(dbgs() << '\n'); +} + +bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + // Check if the cached information is valid. + // The same BitVector can be reused for all PhysRegs. + // We could cache multiple VirtRegs if it becomes necessary. + if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) { + RegMaskVirtReg = VirtReg.reg; + RegMaskTag = UserTag; + RegMaskUsable.clear(); + LIS->checkRegMaskInterference(VirtReg, RegMaskUsable); + } + + // The BitVector is indexed by PhysReg, not register unit. + // Regmask interference is more fine grained than regunits. + // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8. + return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg)); +} + +bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + if (VirtReg.empty()) + return false; + CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) + return true; + return false; +} + +LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg, + unsigned RegUnit) { + LiveIntervalUnion::Query &Q = Queries[RegUnit]; + Q.init(UserTag, &VirtReg, &Matrix[RegUnit]); + return Q; +} + +LiveRegMatrix::InterferenceKind +LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { + if (VirtReg.empty()) + return IK_Free; + + // Regmask interference is the fastest check. + if (checkRegMaskInterference(VirtReg, PhysReg)) + return IK_RegMask; + + // Check for fixed interference. + if (checkRegUnitInterference(VirtReg, PhysReg)) + return IK_RegUnit; + + // Check the matrix for virtual register interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (query(VirtReg, *Units).checkInterference()) + return IK_VirtReg; + + return IK_Free; +} diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp new file mode 100644 index 0000000..be11a8f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -0,0 +1,86 @@ +//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the live stack slot analysis pass. It is analogous to +// live interval analysis except it's analyzing liveness of stack slots rather +// than registers. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "livestacks" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <limits> +using namespace llvm; + +char LiveStacks::ID = 0; +INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false) + +char &llvm::LiveStacksID = LiveStacks::ID; + +void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addPreserved<SlotIndexes>(); + AU.addRequiredTransitive<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveStacks::releaseMemory() { + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); + S2IMap.clear(); + S2RCMap.clear(); +} + +bool LiveStacks::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getTarget().getRegisterInfo(); + // FIXME: No analysis is being done right now. We are relying on the + // register allocators to provide the information. + return false; +} + +LiveInterval & +LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { + assert(Slot >= 0 && "Spill slot indice must be >= 0"); + SS2IntervalMap::iterator I = S2IMap.find(Slot); + if (I == S2IMap.end()) { + I = S2IMap.insert(I, std::make_pair(Slot, + LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F))); + S2RCMap.insert(std::make_pair(Slot, RC)); + } else { + // Use the largest common subclass register class. + const TargetRegisterClass *OldRC = S2RCMap[Slot]; + S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC); + } + return I->second; +} + +/// print - Implement the dump method. +void LiveStacks::print(raw_ostream &OS, const Module*) const { + + OS << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second.print(OS); + int Slot = I->first; + const TargetRegisterClass *RC = getIntervalRegClass(Slot); + if (RC) + OS << " [" << RC->getName() << "]\n"; + else + OS << " [Unknown]\n"; + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp new file mode 100644 index 0000000..789eddc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -0,0 +1,826 @@ +//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveVariable analysis pass. For each machine +// instruction in the function, this pass calculates the set of registers that +// are immediately dead after the instruction (i.e., the instruction calculates +// the value, but it is never used) and the set of registers that are used by +// the instruction, but are never used after the instruction (i.e., they are +// killed). +// +// This class computes live variables using a sparse implementation based on +// the machine code SSA form. This class computes live variable information for +// each virtual and _register allocatable_ physical register in a function. It +// uses the dominance properties of SSA form to efficiently compute live +// variables for virtual registers, and assumes that physical registers are only +// live within a single basic block (allowing it to do a single local analysis +// to resolve physical register lifetimes in each basic block). If a physical +// register is not register allocatable, it is not tracked. This is useful for +// things like the stack pointer and condition codes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +using namespace llvm; + +char LiveVariables::ID = 0; +char &llvm::LiveVariablesID = LiveVariables::ID; +INITIALIZE_PASS_BEGIN(LiveVariables, "livevars", + "Live Variable Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim) +INITIALIZE_PASS_END(LiveVariables, "livevars", + "Live Variable Analysis", false, false) + + +void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(UnreachableMachineBlockElimID); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr * +LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + if (Kills[i]->getParent() == MBB) + return Kills[i]; + return NULL; +} + +void LiveVariables::VarInfo::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " Alive in blocks: "; + for (SparseBitVector<>::iterator I = AliveBlocks.begin(), + E = AliveBlocks.end(); I != E; ++I) + dbgs() << *I << ", "; + dbgs() << "\n Killed by:"; + if (Kills.empty()) + dbgs() << " No instructions.\n"; + else { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + dbgs() << "\n #" << i << ": " << *Kills[i]; + dbgs() << "\n"; + } +#endif +} + +/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. +LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { + assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + "getVarInfo: not a virtual register!"); + VirtRegInfo.grow(RegIdx); + return VirtRegInfo[RegIdx]; +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB, + std::vector<MachineBasicBlock*> &WorkList) { + unsigned BBNum = MBB->getNumber(); + + // Check to see if this basic block is one of the killing blocks. If so, + // remove it. + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + if (VRInfo.Kills[i]->getParent() == MBB) { + VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry + break; + } + + if (MBB == DefBlock) return; // Terminate recursion + + if (VRInfo.AliveBlocks.test(BBNum)) + return; // We already know the block is live + + // Mark the variable known alive in this bb + VRInfo.AliveBlocks.set(BBNum); + + assert(MBB != &MF->front() && "Can't find reaching def for virtreg"); + WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB) { + std::vector<MachineBasicBlock*> WorkList; + MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); + + while (!WorkList.empty()) { + MachineBasicBlock *Pred = WorkList.back(); + WorkList.pop_back(); + MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList); + } +} + +void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MRI->getVRegDef(reg) && "Register use before def!"); + + unsigned BBNum = MBB->getNumber(); + + VarInfo& VRInfo = getVarInfo(reg); + + // Check to see if this basic block is already a kill block. + if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { + // Yes, this register is killed in this basic block already. Increase the + // live range by updating the kill instruction. + VRInfo.Kills.back() = MI; + return; + } + +#ifndef NDEBUG + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!"); +#endif + + // This situation can occur: + // + // ,------. + // | | + // | v + // | t2 = phi ... t1 ... + // | | + // | v + // | t1 = ... + // | ... = ... t1 ... + // | | + // `------' + // + // where there is a use in a PHI node that's a predecessor to the defining + // block. We don't want to mark all predecessors as having the value "alive" + // in this case. + if (MBB == MRI->getVRegDef(reg)->getParent()) return; + + // Add a new kill entry for this basic block. If this virtual register is + // already marked as alive in this basic block, that means it is alive in at + // least one of the successor blocks, it's not a kill. + if (!VRInfo.AliveBlocks.test(BBNum)) + VRInfo.Kills.push_back(MI); + + // Update all dominating blocks to mark them as "known live". + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) + MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI); +} + +void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { + VarInfo &VRInfo = getVarInfo(Reg); + + if (VRInfo.AliveBlocks.empty()) + // If vr is not alive in any block, then defaults to dead. + VRInfo.Kills.push_back(MI); +} + +/// FindLastPartialDef - Return the last partial def of the specified register. +/// Also returns the sub-registers that're defined by the instruction. +MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, + SmallSet<unsigned,4> &PartDefRegs) { + unsigned LastDefReg = 0; + unsigned LastDefDist = 0; + MachineInstr *LastDef = NULL; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + MachineInstr *Def = PhysRegDef[SubReg]; + if (!Def) + continue; + unsigned Dist = DistanceMap[Def]; + if (Dist > LastDefDist) { + LastDefReg = SubReg; + LastDef = Def; + LastDefDist = Dist; + } + } + + if (!LastDef) + return 0; + + PartDefRegs.insert(LastDefReg); + for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastDef->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + continue; + unsigned DefReg = MO.getReg(); + if (TRI->isSubRegister(Reg, DefReg)) { + PartDefRegs.insert(DefReg); + for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs) + PartDefRegs.insert(*SubRegs); + } + } + return LastDef; +} + +/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add +/// implicit defs to a machine instruction if there was an earlier def of its +/// super-register. +void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; + // If there was a previous use or a "full" def all is well. + if (!LastDef && !PhysRegUse[Reg]) { + // Otherwise, the last sub-register def implicitly defines this register. + // e.g. + // AH = + // AL = ... <imp-def EAX>, <imp-kill AH> + // = AH + // ... + // = EAX + // All of the sub-registers must have been defined before the use of Reg! + SmallSet<unsigned, 4> PartDefRegs; + MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs); + // If LastPartialDef is NULL, it must be using a livein register. + if (LastPartialDef) { + LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[Reg] = LastPartialDef; + SmallSet<unsigned, 8> Processed; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (Processed.count(SubReg)) + continue; + if (PartDefRegs.count(SubReg)) + continue; + // This part of Reg was defined before the last partial def. It's killed + // here. + LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg, + false/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[SubReg] = LastPartialDef; + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + Processed.insert(*SS); + } + } + } else if (LastDef && !PhysRegUse[Reg] && + !LastDef->findRegisterDefOperand(Reg)) + // Last def defines the super register, add an implicit def of reg. + LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); + + // Remember this use. + PhysRegUse[Reg] = MI; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + PhysRegUse[*SubRegs] = MI; +} + +/// FindLastRefOrPartRef - Return the last reference or partial reference of +/// the specified register. +MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) + return 0; + + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + unsigned LastPartDefDist = 0; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) + LastPartDefDist = Dist; + } else if (MachineInstr *Use = PhysRegUse[SubReg]) { + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + return LastRefOrPartRef; +} + +bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) + return false; + + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + // The whole register is used. + // AL = + // AH = + // + // = AX + // = AL, AX<imp-use, kill> + // AX = + // + // Or whole register is defined, but not used at all. + // AX<dead> = + // ... + // AX = + // + // Or whole register is defined, but only partly used. + // AX<dead> = AL<imp-def> + // = AL<kill> + // AX = + MachineInstr *LastPartDef = 0; + unsigned LastPartDefDist = 0; + SmallSet<unsigned, 8> PartUses; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) { + LastPartDefDist = Dist; + LastPartDef = Def; + } + continue; + } + if (MachineInstr *Use = PhysRegUse[SubReg]) { + PartUses.insert(SubReg); + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + PartUses.insert(*SS); + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + if (!PhysRegUse[Reg]) { + // Partial uses. Mark register def dead and add implicit def of + // sub-registers which are used. + // EAX<dead> = op AL<imp-def> + // That is, EAX def is dead but AL def extends pass it. + PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (!PartUses.count(SubReg)) + continue; + bool NeedDef = true; + if (PhysRegDef[Reg] == PhysRegDef[SubReg]) { + MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg); + if (MO) { + NeedDef = false; + assert(!MO->isDead()); + } + } + if (NeedDef) + PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, + true/*IsDef*/, true/*IsImp*/)); + MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg); + if (LastSubRef) + LastSubRef->addRegisterKilled(SubReg, TRI, true); + else { + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + PhysRegUse[SubReg] = LastRefOrPartRef; + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + PhysRegUse[*SS] = LastRefOrPartRef; + } + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + PartUses.erase(*SS); + } + } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { + if (LastPartDef) + // The last partial def kills the register. + LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/, true/*IsKill*/)); + else { + MachineOperand *MO = + LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); + bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + if (NeedEC) { + // If we are adding a subreg def and the superreg def is marked early + // clobber, add an early clobber marker to the subreg def. + MO = LastRefOrPartRef->findRegisterDefOperand(Reg); + if (MO) + MO->setIsEarlyClobber(); + } + } + } else + LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); + return true; +} + +void LiveVariables::HandleRegMask(const MachineOperand &MO) { + // Call HandlePhysRegKill() for all live registers clobbered by Mask. + // Clobbered registers are always dead, sp there is no need to use + // HandlePhysRegDef(). + for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) { + // Skip dead regs. + if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) + continue; + // Skip mask-preserved regs. + if (!MO.clobbersPhysReg(Reg)) + continue; + // Kill the largest clobbered super-register. + // This avoids needless implicit operands. + unsigned Super = Reg; + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) + Super = *SR; + HandlePhysRegKill(Super, 0); + } +} + +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, + SmallVector<unsigned, 4> &Defs) { + // What parts of the register are previously defined? + SmallSet<unsigned, 32> Live; + if (PhysRegDef[Reg] || PhysRegUse[Reg]) { + Live.insert(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Live.insert(*SubRegs); + } else { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + // If a register isn't itself defined, but all parts that make up of it + // are defined, then consider it also defined. + // e.g. + // AL = + // AH = + // = AX + if (Live.count(SubReg)) + continue; + if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { + Live.insert(SubReg); + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + Live.insert(*SS); + } + } + } + + // Start from the largest piece, find the last time any part of the register + // is referenced. + HandlePhysRegKill(Reg, MI); + // Only some of the sub-registers are used. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (!Live.count(SubReg)) + // Skip if this sub-register isn't defined. + continue; + HandlePhysRegKill(SubReg, MI); + } + + if (MI) + Defs.push_back(Reg); // Remember this def. +} + +void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, + SmallVector<unsigned, 4> &Defs) { + while (!Defs.empty()) { + unsigned Reg = Defs.back(); + Defs.pop_back(); + PhysRegDef[Reg] = MI; + PhysRegUse[Reg] = NULL; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + PhysRegDef[SubReg] = MI; + PhysRegUse[SubReg] = NULL; + } + } +} + +bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MRI = &mf.getRegInfo(); + TRI = MF->getTarget().getRegisterInfo(); + + unsigned NumRegs = TRI->getNumRegs(); + PhysRegDef = new MachineInstr*[NumRegs]; + PhysRegUse = new MachineInstr*[NumRegs]; + PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + PHIJoins.clear(); + + // FIXME: LiveIntervals will be updated to remove its dependence on + // LiveVariables to improve compilation time and eliminate bizarre pass + // dependencies. Until then, we can't change much in -O0. + if (!MRI->isSSA()) + report_fatal_error("regalloc=... not currently supported with -O0"); + + analyzePHINodes(mf); + + // Calculate live variable information in depth first order on the CFG of the + // function. This guarantees that we will see the definition of a virtual + // register before its uses due to dominance properties of SSA (except for PHI + // nodes, which are treated as a special case). + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + + // Mark live-in registers as live-in. + SmallVector<unsigned, 4> Defs; + for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), + EE = MBB->livein_end(); II != EE; ++II) { + assert(TargetRegisterInfo::isPhysicalRegister(*II) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(*II, 0, Defs); + } + + // Loop over all of the instructions, processing them. + DistanceMap.clear(); + unsigned Dist = 0; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + DistanceMap.insert(std::make_pair(MI, Dist++)); + + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->isPHI()) + NumOperandsToProcess = 1; + + // Clear kill and dead markers. LV will recompute them. + SmallVector<unsigned, 4> UseRegs; + SmallVector<unsigned, 4> DefRegs; + SmallVector<unsigned, 1> RegMasks; + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + RegMasks.push_back(i); + continue; + } + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + MO.setIsKill(false); + if (MO.readsReg()) + UseRegs.push_back(MOReg); + } else /*MO.isDef()*/ { + MO.setIsDead(false); + DefRegs.push_back(MOReg); + } + } + + // Process all uses. + for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { + unsigned MOReg = UseRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegUse(MOReg, MBB, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegUse(MOReg, MI); + } + + // Process all masked registers. (Call clobbers). + for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) + HandleRegMask(MI->getOperand(RegMasks[i])); + + // Process all defs. + for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { + unsigned MOReg = DefRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegDef(MOReg, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegDef(MOReg, MI, Defs); + } + UpdatePhysRegDefs(MI, Defs); + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) + // Mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MBB); + } + + // MachineCSE may CSE instructions which write to non-allocatable physical + // registers across MBBs. Remember if any reserved register is liveout. + SmallSet<unsigned, 4> LiveOuts; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->isLandingPad()) + continue; + for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), + LE = SuccMBB->livein_end(); LI != LE; ++LI) { + unsigned LReg = *LI; + if (!TRI->isInAllocatableClass(LReg)) + // Ignore other live-ins, e.g. those that are live into landing pads. + LiveOuts.insert(LReg); + } + } + + // Loop over PhysRegDef / PhysRegUse, killing any registers that are + // available at the end of the basic block. + for (unsigned i = 0; i != NumRegs; ++i) + if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) + HandlePhysRegDef(i, 0, Defs); + + std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); + std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + } + + // Convert and transfer the dead / killed information we have gathered into + // VirtRegInfo onto MI's. + for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { + const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) + if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) + VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); + else + VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI); + } + + // Check to make sure there are no unreachable blocks in the MC CFG for the + // function. If so, it is due to a bug in the instruction selector or some + // other part of the code generator if this happens. +#ifndef NDEBUG + for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) + assert(Visited.count(&*i) != 0 && "unreachable basic block found"); +#endif + + delete[] PhysRegDef; + delete[] PhysRegUse; + delete[] PHIVarInfo; + + return false; +} + +/// replaceKillInstruction - Update register kill info by replacing a kill +/// instruction with a new one. +void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI, + MachineInstr *NewMI) { + VarInfo &VI = getVarInfo(Reg); + std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI); +} + +/// removeVirtualRegistersKilled - Remove all killed info for the specified +/// instruction. +void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + MO.setIsKill(false); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + bool removed = getVarInfo(Reg).removeKill(MI); + assert(removed && "kill not in register's VarInfo?"); + (void)removed; + } + } + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the variable information of a virtual register +/// which is used in a PHI node. We map that to the BB the vreg is coming from. +/// +void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { + for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + if (BBI->getOperand(i).readsReg()) + PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI->getOperand(i).getReg()); +} + +bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, + unsigned Reg, + MachineRegisterInfo &MRI) { + unsigned Num = MBB.getNumber(); + + // Reg is live-through. + if (AliveBlocks.test(Num)) + return true; + + // Registers defined in MBB cannot be live in. + const MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def && Def->getParent() == &MBB) + return false; + + // Reg was not defined in MBB, was it killed here? + return findKill(&MBB); +} + +bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { + LiveVariables::VarInfo &VI = getVarInfo(Reg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + SmallVector<MachineBasicBlock*, 8> OpSuccBlocks; + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + E = MBB.succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (VI.AliveBlocks.test(SuccIdx)) + return true; + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB) + return true; + break; + } + case 2: { + MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB1 || + VI.Kills[i]->getParent() == SuccMBB2) + return true; + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + VI.Kills[i]->getParent())) + return true; + } + return false; +} + +/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All +/// variables that are live out of DomBB will be marked as passing live through +/// BB. +void LiveVariables::addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB, + MachineBasicBlock *SuccBB) { + const unsigned NumNew = BB->getNumber(); + + SmallSet<unsigned, 16> Defs, Kills; + + MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); + for (; BBI != BBE && BBI->isPHI(); ++BBI) { + // Record the def of the PHI node. + Defs.insert(BBI->getOperand(0).getReg()); + + // All registers used by PHI nodes in SuccBB must be live through BB. + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + if (BBI->getOperand(i+1).getMBB() == BB) + getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); + } + + // Record all vreg defs and kills of all instructions in SuccBB. + for (; BBI != BBE; ++BBI) { + for (MachineInstr::mop_iterator I = BBI->operands_begin(), + E = BBI->operands_end(); I != E; ++I) { + if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isDef()) + Defs.insert(I->getReg()); + else if (I->isKill()) + Kills.insert(I->getReg()); + } + } + } + + // Update info for all live variables + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // If the Defs is defined in the successor it can't be live in BB. + if (Defs.count(Reg)) + continue; + + // If the register is either killed in or live through SuccBB it's also live + // through BB. + VarInfo &VI = getVarInfo(Reg); + if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber())) + VI.AliveBlocks.set(NumNew); + } +} diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp new file mode 100644 index 0000000..352ef94 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -0,0 +1,356 @@ +//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns local frame indices to stack slots relative to one another +// and allocates additional base registers to access them when the target +// estimates they are likely to be out of range of stack pointer and frame +// pointer relative addressing. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "localstackalloc" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(NumAllocations, "Number of frame indices allocated into local block"); +STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); +STATISTIC(NumReplacements, "Number of frame indices references replaced"); + +namespace { + class FrameRef { + MachineBasicBlock::iterator MI; // Instr referencing the frame + int64_t LocalOffset; // Local offset of the frame idx referenced + public: + FrameRef(MachineBasicBlock::iterator I, int64_t Offset) : + MI(I), LocalOffset(Offset) {} + bool operator<(const FrameRef &RHS) const { + return LocalOffset < RHS.LocalOffset; + } + MachineBasicBlock::iterator getMachineInstr() { return MI; } + }; + + class LocalStackSlotPass: public MachineFunctionPass { + SmallVector<int64_t,16> LocalOffsets; + + void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, + bool StackGrowsDown, unsigned &MaxAlign); + void calculateFrameObjectOffsets(MachineFunction &Fn); + bool insertFrameReferenceRegisters(MachineFunction &Fn); + public: + static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } + bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + }; +} // end anonymous namespace + +char LocalStackSlotPass::ID = 0; +char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; +INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) + +bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + unsigned LocalObjectCount = MFI->getObjectIndexEnd(); + + // If the target doesn't want/need this pass, or if there are no locals + // to consider, early exit. + if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0) + return true; + + // Make sure we have enough space to store the local offsets. + LocalOffsets.resize(MFI->getObjectIndexEnd()); + + // Lay out the local blob. + calculateFrameObjectOffsets(MF); + + // Insert virtual base registers to resolve frame index references. + bool UsedBaseRegs = insertFrameReferenceRegisters(MF); + + // Tell MFI whether any base registers were allocated. PEI will only + // want to use the local block allocations from this pass if there were any. + // Otherwise, PEI can do a bit better job of getting the alignment right + // without a hole at the start since it knows the alignment of the stack + // at the start of local allocation, and this pass doesn't. + MFI->setUseLocalStackAllocationBlock(UsedBaseRegs); + + return true; +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, + int FrameIdx, int64_t &Offset, + bool StackGrowsDown, + unsigned &MaxAlign) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; + DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " + << LocalOffset << "\n"); + // Keep the offset available for base register allocation + LocalOffsets[FrameIdx] = LocalOffset; + // And tell MFI about it for PEI to use later + MFI->mapLocalFrameObject(FrameIdx, LocalOffset); + + if (!StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + ++NumAllocations; +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + int64_t Offset = 0; + unsigned MaxAlign = 0; + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, + StackGrowsDown, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + LargeStackObjs.insert(i); + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + } + + // Remember how big this blob of stack space is + MFI->setLocalFrameSize(Offset); + MFI->setLocalFrameMaxAlign(MaxAlign); +} + +static inline bool +lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs, + std::pair<unsigned, int64_t> &RegOffset, + int64_t FrameSizeAdjust, + int64_t LocalFrameOffset, + const MachineInstr *MI, + const TargetRegisterInfo *TRI) { + unsigned e = Regs.size(); + for (unsigned i = 0; i < e; ++i) { + RegOffset = Regs[i]; + // Check if the relative offset from the where the base register references + // to the target address is in range for the instruction. + int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second; + if (TRI->isFrameOffsetLegal(MI, Offset)) + return true; + } + return false; +} + +bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { + // Scan the function's instructions looking for frame index references. + // For each, ask the target if it wants a virtual base register for it + // based on what we can tell it about where the local will end up in the + // stack frame. If it wants one, re-use a suitable one we've previously + // allocated, or if there isn't one that fits the bill, allocate a new one + // and ask the target to create a defining instruction for it. + bool UsedBaseReg = false; + + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + // Collect all of the instructions in the block that reference + // a frame index. Also store the frame index referenced to ease later + // lookup. (For any insn that has more than one FI reference, we arbitrarily + // choose the first one). + SmallVector<FrameRef, 64> FrameReferenceInsns; + + // A base register definition is a register + offset pair. + SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + + // Debug value instructions can't be out of range, so they don't need + // any updates. + if (MI->isDebugValue()) + continue; + + // For now, allocate the base register(s) within the basic block + // where they're used, and don't try to keep them around outside + // of that. It may be beneficial to try sharing them more broadly + // than that, but the increased register pressure makes that a + // tricky thing to balance. Investigate if re-materializing these + // becomes an issue. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(i).isFI()) { + // Don't try this with values not in the local block. + if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) + break; + FrameReferenceInsns. + push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()])); + break; + } + } + } + } + + // Sort the frame references by local offset + array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + + MachineBasicBlock *Entry = Fn.begin(); + + // Loop through the frame references and allocate for them as necessary. + for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { + MachineBasicBlock::iterator I = + FrameReferenceInsns[ref].getMachineInstr(); + MachineInstr *MI = I; + for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(idx).isFI()) { + int FrameIdx = MI->getOperand(idx).getIndex(); + + assert(MFI->isObjectPreAllocated(FrameIdx) && + "Only pre-allocated locals expected!"); + + DEBUG(dbgs() << "Considering: " << *MI); + if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) { + unsigned BaseReg = 0; + int64_t Offset = 0; + int64_t FrameSizeAdjust = + StackGrowsDown ? MFI->getLocalFrameSize() : 0; + + DEBUG(dbgs() << " Replacing FI in: " << *MI); + + // If we have a suitable base register available, use it; otherwise + // create a new one. Note that any offset encoded in the + // instruction itself will be taken into account by the target, + // so we don't have to adjust for it here when reusing a base + // register. + std::pair<unsigned, int64_t> RegOffset; + if (lookupCandidateBaseReg(BaseRegisters, RegOffset, + FrameSizeAdjust, + LocalOffsets[FrameIdx], + MI, TRI)) { + DEBUG(dbgs() << " Reusing base register " << + RegOffset.first << "\n"); + // We found a register to reuse. + BaseReg = RegOffset.first; + Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] - + RegOffset.second; + } else { + // No previously defined register was in range, so create a + // new one. + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); + BaseReg = Fn.getRegInfo().createVirtualRegister(RC); + + DEBUG(dbgs() << " Materializing base register " << BaseReg << + " at frame local offset " << + LocalOffsets[FrameIdx] + InstrOffset << "\n"); + + // Tell the target to insert the instruction to initialize + // the base register. + // MachineBasicBlock::iterator InsertionPt = Entry->begin(); + TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, + InstrOffset); + + // The base register already includes any offset specified + // by the instruction, so account for that so it doesn't get + // applied twice. + Offset = -InstrOffset; + + int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] + + InstrOffset; + BaseRegisters.push_back( + std::pair<unsigned, int64_t>(BaseReg, BaseOffset)); + ++NumBaseRegisters; + UsedBaseReg = true; + } + assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + + // Modify the instruction to use the new base register rather + // than the frame index operand. + TRI->resolveFrameIndex(I, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << *MI); + + ++NumReplacements; + } + } + } + } + return UsedBaseReg; +} diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp new file mode 100644 index 0000000..898e165 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -0,0 +1,1181 @@ +//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect the sequence of machine instructions for a basic block. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/LeakDetector.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +using namespace llvm; + +MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) + : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), + AddressTaken(false) { + Insts.Parent = this; +} + +MachineBasicBlock::~MachineBasicBlock() { + LeakDetector::removeGarbageObject(this); +} + +/// getSymbol - Return the MCSymbol for this basic block. +/// +MCSymbol *MachineBasicBlock::getSymbol() const { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + "_" + + Twine(getNumber())); +} + + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { + MBB.print(OS); + return OS; +} + +/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the +/// parent pointer of the MBB, the MBB numbering, and any instructions in the +/// MBB to be on the right operand list for registers. +/// +/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it +/// gets the next available unique MBB number. If it is removed from a +/// MachineFunction, it goes back to being #-1. +void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { + MachineFunction &MF = *N->getParent(); + N->Number = MF.addToMBBNumbering(N); + + // Make sure the instructions have their operands in the reginfo lists. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (MachineBasicBlock::instr_iterator + I = N->instr_begin(), E = N->instr_end(); I != E; ++I) + I->AddRegOperandsToUseLists(RegInfo); + + LeakDetector::removeGarbageObject(N); +} + +void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { + N->getParent()->removeFromMBBNumbering(N->Number); + N->Number = -1; + LeakDetector::addGarbageObject(N); +} + + +/// addNodeToList (MI) - When we add an instruction to a basic block +/// list, we update its parent pointer and add its operands from reg use/def +/// lists if appropriate. +void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { + assert(N->getParent() == 0 && "machine instruction already in a basic block"); + N->setParent(Parent); + + // Add the instruction's register operands to their corresponding + // use/def lists. + MachineFunction *MF = Parent->getParent(); + N->AddRegOperandsToUseLists(MF->getRegInfo()); + + LeakDetector::removeGarbageObject(N); +} + +/// removeNodeFromList (MI) - When we remove an instruction from a basic block +/// list, we update its parent pointer and remove its operands from reg use/def +/// lists if appropriate. +void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { + assert(N->getParent() != 0 && "machine instruction not in a basic block"); + + // Remove from the use/def lists. + if (MachineFunction *MF = N->getParent()->getParent()) + N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); + + N->setParent(0); + + LeakDetector::addGarbageObject(N); +} + +/// transferNodesFromList (MI) - When moving a range of instructions from one +/// MBB list to another, we need to update the parent pointers and the use/def +/// lists. +void ilist_traits<MachineInstr>:: +transferNodesFromList(ilist_traits<MachineInstr> &fromList, + ilist_iterator<MachineInstr> first, + ilist_iterator<MachineInstr> last) { + assert(Parent->getParent() == fromList.Parent->getParent() && + "MachineInstr parent mismatch!"); + + // Splice within the same MBB -> no change. + if (Parent == fromList.Parent) return; + + // If splicing between two blocks within the same function, just update the + // parent pointers. + for (; first != last; ++first) + first->setParent(Parent); +} + +void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { + assert(!MI->getParent() && "MI is still in a block!"); + Parent->getParent()->DeleteMachineInstr(MI); +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { + instr_iterator I = instr_begin(), E = instr_end(); + while (I != E && I->isPHI()) + ++I; + assert((I == E || !I->isInsideBundle()) && + "First non-phi MI cannot be inside a bundle!"); + return I; +} + +MachineBasicBlock::iterator +MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { + iterator E = end(); + while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue())) + ++I; + // FIXME: This needs to change if we wish to bundle labels / dbg_values + // inside the bundle. + assert((I == E || !I->isInsideBundle()) && + "First non-phi / non-label instruction is inside a bundle!"); + return I; +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { + iterator B = begin(), E = end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getFirstTerminator() const { + const_iterator B = begin(), E = end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { + instr_iterator B = instr_begin(), E = instr_end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { + // Skip over end-of-block dbg_value instructions. + instr_iterator B = instr_begin(), I = instr_end(); + while (I != B) { + --I; + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) + continue; + return I; + } + // The block is all debug values. + return end(); +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getLastNonDebugInstr() const { + // Skip over end-of-block dbg_value instructions. + const_instr_iterator B = instr_begin(), I = instr_end(); + while (I != B) { + --I; + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) + continue; + return I; + } + // The block is all debug values. + return end(); +} + +const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { + // A block with a landing pad successor only has one other successor. + if (succ_size() > 2) + return 0; + for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) + if ((*I)->isLandingPad()) + return *I; + return 0; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineBasicBlock::dump() const { + print(dbgs()); +} +#endif + +StringRef MachineBasicBlock::getName() const { + if (const BasicBlock *LBB = getBasicBlock()) + return LBB->getName(); + else + return "(null)"; +} + +/// Return a hopefully unique identifier for this block. +std::string MachineBasicBlock::getFullName() const { + std::string Name; + if (getParent()) + Name = (getParent()->getName() + ":").str(); + if (getBasicBlock()) + Name += getBasicBlock()->getName(); + else + Name += (Twine("BB") + Twine(getNumber())).str(); + return Name; +} + +void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { + const MachineFunction *MF = getParent(); + if (!MF) { + OS << "Can't print out MachineBasicBlock because parent MachineFunction" + << " is null\n"; + return; + } + + if (Indexes) + OS << Indexes->getMBBStartIdx(this) << '\t'; + + OS << "BB#" << getNumber() << ": "; + + const char *Comma = ""; + if (const BasicBlock *LBB = getBasicBlock()) { + OS << Comma << "derived from LLVM BB "; + WriteAsOperand(OS, LBB, /*PrintType=*/false); + Comma = ", "; + } + if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } + if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } + if (Alignment) + OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) + << " bytes)"; + + OS << '\n'; + + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (!livein_empty()) { + if (Indexes) OS << '\t'; + OS << " Live Ins:"; + for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) + OS << ' ' << PrintReg(*I, TRI); + OS << '\n'; + } + // Print the preds of this block according to the CFG. + if (!pred_empty()) { + if (Indexes) OS << '\t'; + OS << " Predecessors according to CFG:"; + for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) + OS << " BB#" << (*PI)->getNumber(); + OS << '\n'; + } + + for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { + if (Indexes) { + if (Indexes->hasIndex(I)) + OS << Indexes->getInstructionIndex(I); + OS << '\t'; + } + OS << '\t'; + if (I->isInsideBundle()) + OS << " * "; + I->print(OS, &getParent()->getTarget()); + } + + // Print the successors of this block according to the CFG. + if (!succ_empty()) { + if (Indexes) OS << '\t'; + OS << " Successors according to CFG:"; + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { + OS << " BB#" << (*SI)->getNumber(); + if (!Weights.empty()) + OS << '(' << *getWeightIterator(SI) << ')'; + } + OS << '\n'; + } +} + +void MachineBasicBlock::removeLiveIn(unsigned Reg) { + std::vector<unsigned>::iterator I = + std::find(LiveIns.begin(), LiveIns.end(), Reg); + if (I != LiveIns.end()) + LiveIns.erase(I); +} + +bool MachineBasicBlock::isLiveIn(unsigned Reg) const { + livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + return I != livein_end(); +} + +void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { + getParent()->splice(NewAfter, this); +} + +void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { + MachineFunction::iterator BBI = NewBefore; + getParent()->splice(++BBI, this); +} + +void MachineBasicBlock::updateTerminator() { + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + // A block with no successors has no concerns with fall-through edges. + if (this->succ_empty()) return; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + DebugLoc dl; // FIXME: this is nowhere + bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); + (void) B; + assert(!B && "UpdateTerminators requires analyzable predecessors!"); + if (Cond.empty()) { + if (TBB) { + // The block has an unconditional branch. If its successor is now + // its layout successor, delete the branch. + if (isLayoutSuccessor(TBB)) + TII->RemoveBranch(*this); + } else { + // The block has an unconditional fallthrough. If its successor is not + // its layout successor, insert a branch. First we have to locate the + // only non-landing-pad successor, as that is the fallthrough block. + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + assert(!TBB && "Found more than one non-landing-pad successor!"); + TBB = *SI; + } + + // If there is no non-landing-pad successor, the block has no + // fall-through edges to be concerned with. + if (!TBB) + return; + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond, dl); + } + } else { + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) + return; + TII->RemoveBranch(*this); + TII->InsertBranch(*this, FBB, 0, Cond, dl); + } else if (isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, 0, Cond, dl); + } + } else { + // Walk through the successors and find the successor which is not + // a landing pad and is not the conditional branch destination (in TBB) + // as the fallthrough successor. + MachineBasicBlock *FallthroughBB = 0; + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad() || *SI == TBB) + continue; + assert(!FallthroughBB && "Found more than one fallthrough successor."); + FallthroughBB = *SI; + } + if (!FallthroughBB && canFallThrough()) { + // We fallthrough to the same basic block as the conditional jump + // targets. Remove the conditional jump, leaving unconditional + // fallthrough. + // FIXME: This does not seem like a reasonable pattern to support, but it + // has been seen in the wild coming out of degenerate ARM test cases. + TII->RemoveBranch(*this); + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond, dl); + return; + } + + // The block has a fallthrough conditional branch. + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) { + // We can't reverse the condition, add an unconditional branch. + Cond.clear(); + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + return; + } + TII->RemoveBranch(*this); + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + } else if (!isLayoutSuccessor(FallthroughBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); + } + } + } +} + +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) { + + // If we see non-zero value for the first time it means we actually use Weight + // list, so we fill all Weights with 0's. + if (weight != 0 && Weights.empty()) + Weights.resize(Successors.size()); + + if (weight != 0 || !Weights.empty()) + Weights.push_back(weight); + + Successors.push_back(succ); + succ->addPredecessor(this); + } + +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { + succ->removePredecessor(this); + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + + Successors.erase(I); +} + +MachineBasicBlock::succ_iterator +MachineBasicBlock::removeSuccessor(succ_iterator I) { + assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + + (*I)->removePredecessor(this); + return Successors.erase(I); +} + +void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, + MachineBasicBlock *New) { + if (Old == New) + return; + + succ_iterator E = succ_end(); + succ_iterator NewI = E; + succ_iterator OldI = E; + for (succ_iterator I = succ_begin(); I != E; ++I) { + if (*I == Old) { + OldI = I; + if (NewI != E) + break; + } + if (*I == New) { + NewI = I; + if (OldI != E) + break; + } + } + assert(OldI != E && "Old is not a successor of this block"); + Old->removePredecessor(this); + + // If New isn't already a successor, let it take Old's place. + if (NewI == E) { + New->addPredecessor(this); + *OldI = New; + return; + } + + // New is already a successor. + // Update its weight instead of adding a duplicate edge. + if (!Weights.empty()) { + weight_iterator OldWI = getWeightIterator(OldI); + *getWeightIterator(NewI) += *OldWI; + Weights.erase(OldWI); + } + Successors.erase(OldI); +} + +void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { + Predecessors.push_back(pred); +} + +void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred); + assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); + Predecessors.erase(I); +} + +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { + if (this == fromMBB) + return; + + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + uint32_t Weight = 0; + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!fromMBB->Weights.empty()) + Weight = *fromMBB->Weights.begin(); + + addSuccessor(Succ, Weight); + fromMBB->removeSuccessor(Succ); + } +} + +void +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { + if (this == fromMBB) + return; + + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + uint32_t Weight = 0; + if (!fromMBB->Weights.empty()) + Weight = *fromMBB->Weights.begin(); + addSuccessor(Succ, Weight); + fromMBB->removeSuccessor(Succ); + + // Fix up any PHI nodes in the successor. + for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), + ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) + for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI->getOperand(i); + if (MO.getMBB() == fromMBB) + MO.setMBB(this); + } + } +} + +bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { + return std::find(pred_begin(), pred_end(), MBB) != pred_end(); +} + +bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { + return std::find(succ_begin(), succ_end(), MBB) != succ_end(); +} + +bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { + MachineFunction::const_iterator I(this); + return llvm::next(I) == MachineFunction::const_iterator(MBB); +} + +bool MachineBasicBlock::canFallThrough() { + MachineFunction::iterator Fallthrough = this; + ++Fallthrough; + // If FallthroughBlock is off the end of the function, it can't fall through. + if (Fallthrough == getParent()->end()) + return false; + + // If FallthroughBlock isn't a successor, no fallthrough is possible. + if (!isSuccessor(Fallthrough)) + return false; + + // Analyze the branches, if any, at the end of the block. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicated check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. + return empty() || !back().isBarrier() || TII->isPredicated(&back()); + } + + // If there is no branch, control always falls through. + if (TBB == 0) return true; + + // If there is some explicit branch to the fallthrough block, it can obviously + // reach, even though the branch should get folded to fall through implicitly. + if (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough) + return true; + + // If it's an unconditional branch to some block not the fall through, it + // doesn't fall through. + if (Cond.empty()) return false; + + // Otherwise, if it is conditional and has no explicit false block, it falls + // through. + return FBB == 0; +} + +MachineBasicBlock * +MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (Succ->isLandingPad()) + return NULL; + + MachineFunction *MF = getParent(); + DebugLoc dl; // FIXME: this is nowhere + + // We may need to update this's terminator, but we can't do that if + // AnalyzeBranch fails. If this uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) + return NULL; + + // Avoid bugpoint weirdness: A block may end with a conditional branch but + // jumps to the same MBB is either case. We have duplicate CFG edges in that + // case that we can't handle. Since this never happens in properly optimized + // code, just skip those edges. + if (TBB && TBB == FBB) { + DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" + << getNumber() << '\n'); + return NULL; + } + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); + DEBUG(dbgs() << "Splitting critical edge:" + " BB#" << getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << Succ->getNumber() << '\n'); + + LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); + SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); + if (LIS) + LIS->insertMBBInMaps(NMBB); + else if (Indexes) + Indexes->insertMBBInMaps(NMBB); + + // On some targets like Mips, branches may kill virtual registers. Make sure + // that LiveVariables is properly updated after updateTerminator replaces the + // terminators. + LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); + + // Collect a list of virtual registers killed by the terminators. + SmallVector<unsigned, 4> KilledRegs; + if (LV) + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { + MachineInstr *MI = I; + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || OI->getReg() == 0 || + !OI->isUse() || !OI->isKill() || OI->isUndef()) + continue; + unsigned Reg = OI->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + LV->getVarInfo(Reg).removeKill(MI)) { + KilledRegs.push_back(Reg); + DEBUG(dbgs() << "Removing terminator kill: " << *MI); + OI->setIsKill(false); + } + } + } + + SmallVector<unsigned, 4> UsedRegs; + if (LIS) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { + MachineInstr *MI = I; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || OI->getReg() == 0) + continue; + + unsigned Reg = OI->getReg(); + if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) + UsedRegs.push_back(Reg); + } + } + } + + ReplaceUsesOfBlockWith(Succ, NMBB); + + // If updateTerminator() removes instructions, we need to remove them from + // SlotIndexes. + SmallVector<MachineInstr*, 4> Terminators; + if (Indexes) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + Terminators.push_back(I); + } + + updateTerminator(); + + if (Indexes) { + SmallVector<MachineInstr*, 4> NewTerminators; + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + NewTerminators.push_back(I); + + for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), + E = Terminators.end(); I != E; ++I) { + if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == + NewTerminators.end()) + Indexes->removeMachineInstrFromMaps(*I); + } + } + + // Insert unconditional "jump Succ" instruction in NMBB if necessary. + NMBB->addSuccessor(Succ); + if (!NMBB->isLayoutSuccessor(Succ)) { + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + + if (Indexes) { + for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); + I != E; ++I) { + // Some instructions may have been moved to NMBB by updateTerminator(), + // so we first remove any instruction that already has an index. + if (Indexes->hasIndex(I)) + Indexes->removeMachineInstrFromMaps(I); + Indexes->insertMachineInstrInMaps(I); + } + } + } + + // Fix PHI nodes in Succ so they refer to NMBB instead of this + for (MachineBasicBlock::instr_iterator + i = Succ->instr_begin(),e = Succ->instr_end(); + i != e && i->isPHI(); ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == this) + i->getOperand(ni+1).setMBB(NMBB); + + // Inherit live-ins from the successor + for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), + E = Succ->livein_end(); I != E; ++I) + NMBB->addLiveIn(*I); + + // Update LiveVariables. + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (LV) { + // Restore kills of virtual registers that were killed by the terminators. + while (!KilledRegs.empty()) { + unsigned Reg = KilledRegs.pop_back_val(); + for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { + if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + LV->getVarInfo(Reg).Kills.push_back(I); + DEBUG(dbgs() << "Restored terminator kill: " << *I); + break; + } + } + // Update relevant live-through information. + LV->addNewBlock(NMBB, this, Succ); + } + + if (LIS) { + // After splitting the edge and updating SlotIndexes, live intervals may be + // in one of two situations, depending on whether this block was the last in + // the function. If the original block was the last in the function, all live + // intervals will end prior to the beginning of the new split block. If the + // original block was not at the end of the function, all live intervals will + // extend to the end of the new split block. + + bool isLastMBB = + llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end(); + + SlotIndex StartIndex = Indexes->getMBBEndIdx(this); + SlotIndex PrevIndex = StartIndex.getPrevSlot(); + SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); + + // Find the registers used from NMBB in PHIs in Succ. + SmallSet<unsigned, 8> PHISrcRegs; + for (MachineBasicBlock::instr_iterator + I = Succ->instr_begin(), E = Succ->instr_end(); + I != E && I->isPHI(); ++I) { + for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { + if (I->getOperand(ni+1).getMBB() == NMBB) { + MachineOperand &MO = I->getOperand(ni); + unsigned Reg = MO.getReg(); + PHISrcRegs.insert(Reg); + if (MO.isUndef()) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "PHI sources should be live out of their predecessors."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } + } + } + + MachineRegisterInfo *MRI = &getParent()->getRegInfo(); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + if (!LI.liveAt(PrevIndex)) + continue; + + bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); + if (isLiveOut && isLastMBB) { + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "LiveInterval should have VNInfo where it is live."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } else if (!isLiveOut && !isLastMBB) { + LI.removeRange(StartIndex, EndIndex); + } + } + + // Update all intervals for registers whose uses may have been modified by + // updateTerminator(). + LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); + } + + if (MachineDominatorTree *MDT = + P->getAnalysisIfAvailable<MachineDominatorTree>()) { + // Update dominator information. + MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ); + + bool IsNewIDom = true; + for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end(); + PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (PredBB == NMBB) + continue; + if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) { + IsNewIDom = false; + break; + } + } + + // We know "this" dominates the newly created basic block. + MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom) + MDT->changeImmediateDominator(SucccDTNode, NewDTNode); + } + + if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoop *TIL = MLI->getLoopFor(this)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { + if (TIL == DestLoop) { + // Both in the same loop, the NMBB joins loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (MachineLoop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NMBB, MLI->getBase()); + } + } + } + + return NMBB; +} + +/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's +/// neighboring instructions so the bundle won't be broken by removing MI. +static void unbundleSingleMI(MachineInstr *MI) { + // Removing the first instruction in a bundle. + if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) + MI->unbundleFromSucc(); + // Removing the last instruction in a bundle. + if (MI->isBundledWithPred() && !MI->isBundledWithSucc()) + MI->unbundleFromPred(); + // If MI is not bundled, or if it is internal to a bundle, the neighbor flags + // are already fine. +} + +MachineBasicBlock::instr_iterator +MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) { + unbundleSingleMI(I); + return Insts.erase(I); +} + +MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) { + unbundleSingleMI(MI); + MI->clearFlag(MachineInstr::BundledPred); + MI->clearFlag(MachineInstr::BundledSucc); + return Insts.remove(MI); +} + +MachineBasicBlock::instr_iterator +MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) { + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + // Set the bundle flags when inserting inside a bundle. + if (I != instr_end() && I->isBundledWithPred()) { + MI->setFlag(MachineInstr::BundledPred); + MI->setFlag(MachineInstr::BundledSucc); + } + return Insts.insert(I, MI); +} + +/// removeFromParent - This method unlinks 'this' from the containing function, +/// and returns it, but does not delete it. +MachineBasicBlock *MachineBasicBlock::removeFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->remove(this); + return this; +} + + +/// eraseFromParent - This method unlinks 'this' from the containing function, +/// and deletes it. +void MachineBasicBlock::eraseFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->erase(this); +} + + +/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to +/// 'Old', change the code and CFG so that it branches to 'New' instead. +void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Cannot replace self with self!"); + + MachineBasicBlock::instr_iterator I = instr_end(); + while (I != instr_begin()) { + --I; + if (!I->isTerminator()) break; + + // Scan the operands of this machine instruction, replacing any uses of Old + // with New. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isMBB() && + I->getOperand(i).getMBB() == Old) + I->getOperand(i).setMBB(New); + } + + // Update the successor information. + replaceSuccessor(Old, New); +} + +/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the +/// CFG to be inserted. If we have proven that MBB can only branch to DestA and +/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be +/// null. +/// +/// Besides DestA and DestB, retain other edges leading to LandingPads +/// (currently there can be only one; we don't check or require that here). +/// Note it is possible that DestA and/or DestB are LandingPads. +bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool isCond) { + // The values of DestA and DestB frequently come from a call to the + // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial + // values from there. + // + // 1. If both DestA and DestB are null, then the block ends with no branches + // (it falls through to its successor). + // 2. If DestA is set, DestB is null, and isCond is false, then the block ends + // with only an unconditional branch. + // 3. If DestA is set, DestB is null, and isCond is true, then the block ends + // with a conditional branch that falls through to a successor (DestB). + // 4. If DestA and DestB is set and isCond is true, then the block ends with a + // conditional branch followed by an unconditional branch. DestA is the + // 'true' destination and DestB is the 'false' destination. + + bool Changed = false; + + MachineFunction::iterator FallThru = + llvm::next(MachineFunction::iterator(this)); + + if (DestA == 0 && DestB == 0) { + // Block falls through to successor. + DestA = FallThru; + DestB = FallThru; + } else if (DestA != 0 && DestB == 0) { + if (isCond) + // Block ends in conditional jump that falls through to successor. + DestB = FallThru; + } else { + assert(DestA && DestB && isCond && + "CFG in a bad state. Cannot correct CFG edges"); + } + + // Remove superfluous edges. I.e., those which aren't destinations of this + // basic block, duplicate edges, or landing pads. + SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs; + MachineBasicBlock::succ_iterator SI = succ_begin(); + while (SI != succ_end()) { + const MachineBasicBlock *MBB = *SI; + if (!SeenMBBs.insert(MBB) || + (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) { + // This is a superfluous edge, remove it. + SI = removeSuccessor(SI); + Changed = true; + } else { + ++SI; + } + } + + return Changed; +} + +/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping +/// any DBG_VALUE instructions. Return UnknownLoc if there is none. +DebugLoc +MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { + DebugLoc DL; + instr_iterator E = instr_end(); + if (MBBI == E) + return DL; + + // Skip debug declarations, we don't want a DebugLoc from them. + while (MBBI != E && MBBI->isDebugValue()) + MBBI++; + if (MBBI != E) + DL = MBBI->getDebugLoc(); + return DL; +} + +/// getSuccWeight - Return weight of the edge from this block to MBB. +/// +uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { + if (Weights.empty()) + return 0; + + return *getWeightIterator(Succ); +} + +/// getWeightIterator - Return wight iterator corresonding to the I successor +/// iterator +MachineBasicBlock::weight_iterator MachineBasicBlock:: +getWeightIterator(MachineBasicBlock::succ_iterator I) { + assert(Weights.size() == Successors.size() && "Async weight list!"); + size_t index = std::distance(Successors.begin(), I); + assert(index < Weights.size() && "Not a current successor!"); + return Weights.begin() + index; +} + +/// getWeightIterator - Return wight iterator corresonding to the I successor +/// iterator +MachineBasicBlock::const_weight_iterator MachineBasicBlock:: +getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { + assert(Weights.size() == Successors.size() && "Async weight list!"); + const size_t index = std::distance(Successors.begin(), I); + assert(index < Weights.size() && "Not a current successor!"); + return Weights.begin() + index; +} + +/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed +/// as of just before "MI". +/// +/// Search is localised to a neighborhood of +/// Neighborhood instructions before (searching for defs or kills) and N +/// instructions after (searching just for defs) MI. +MachineBasicBlock::LivenessQueryResult +MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, + unsigned Reg, MachineInstr *MI, + unsigned Neighborhood) { + unsigned N = Neighborhood; + MachineBasicBlock *MBB = MI->getParent(); + + // Start by searching backwards from MI, looking for kills, reads or defs. + + MachineBasicBlock::iterator I(MI); + // If this is the first insn in the block, don't search backwards. + if (I != MBB->begin()) { + do { + --I; + + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.Defines) + // Outputs happen after inputs so they take precedence if both are + // present. + return Analysis.DefinesDead ? LQR_Dead : LQR_Live; + + if (Analysis.Kills || Analysis.Clobbers) + // Register killed, so isn't live. + return LQR_Dead; + + else if (Analysis.ReadsOverlap) + // Defined or read without a previous kill - live. + return Analysis.Reads ? LQR_Live : LQR_OverlappingLive; + + } while (I != MBB->begin() && --N > 0); + } + + // Did we get to the start of the block? + if (I == MBB->begin()) { + // If so, the register's state is definitely defined by the live-in state. + for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); + RAI.isValid(); ++RAI) { + if (MBB->isLiveIn(*RAI)) + return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; + } + + return LQR_Dead; + } + + N = Neighborhood; + + // Try searching forwards from MI, looking for reads or defs. + I = MachineBasicBlock::iterator(MI); + // If this is the last insn in the block, don't search forwards. + if (I != MBB->end()) { + for (++I; I != MBB->end() && N > 0; ++I, --N) { + MachineOperandIteratorBase::PhysRegInfo Analysis = + MIOperands(I).analyzePhysReg(Reg, TRI); + + if (Analysis.ReadsOverlap) + // Used, therefore must have been live. + return (Analysis.Reads) ? + LQR_Live : LQR_OverlappingLive; + + else if (Analysis.Clobbers || Analysis.Defines) + // Defined (but not read) therefore cannot have been live. + return LQR_Dead; + } + } + + // At this point we have no idea of the liveness of the register. + return LQR_Unknown; +} + +void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, + bool t) { + OS << "BB#" << MBB->getNumber(); +} + diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp new file mode 100644 index 0000000..070daf2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -0,0 +1,61 @@ +//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) + +char MachineBlockFrequencyInfo::ID = 0; + + +MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) { + initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); + MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction, + MachineBranchProbabilityInfo>(); +} + +MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() { + delete MBFI; +} + +void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { + MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); + MBFI->doFunction(&F, &MBPI); + return false; +} + +/// getblockFreq - Return block frequency. Return 0 if we don't have the +/// information. Please note that initial frequency is equal to 1024. It means +/// that we should not rely on the value itself, but only on the comparison to +/// the other block frequencies. We do this to avoid using of floating points. +/// +BlockFrequency MachineBlockFrequencyInfo:: +getBlockFreq(const MachineBasicBlock *MBB) const { + return MBFI->getBlockFreq(MBB); +} diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp new file mode 100644 index 0000000..cd948e2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -0,0 +1,1165 @@ +//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations using the CFG +// structure and branch probability estimates. +// +// The pass strives to preserve the structure of the CFG (that is, retain +// a topological ordering of basic blocks) in the absence of a *strong* signal +// to the contrary from probabilities. However, within the CFG structure, it +// attempts to choose an ordering which favors placing more likely sequences of +// blocks adjacent to each other. +// +// The algorithm works from the inner-most loop within a function outward, and +// at each stage walks through the basic blocks, trying to coalesce them into +// sequential chains where allowed by the CFG (or demanded by heavy +// probabilities). Finally, it walks the blocks in topological order, and the +// first time it reaches a chain of basic blocks, it schedules them in the +// function in-order. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "block-placement2" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumCondBranches, "Number of conditional branches"); +STATISTIC(NumUncondBranches, "Number of uncondittional branches"); +STATISTIC(CondBranchTakenFreq, + "Potential frequency of taking conditional branches"); +STATISTIC(UncondBranchTakenFreq, + "Potential frequency of taking unconditional branches"); + +namespace { +class BlockChain; +/// \brief Type for our function-wide basic block -> block chain mapping. +typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType; +} + +namespace { +/// \brief A chain of blocks which will be laid out contiguously. +/// +/// This is the datastructure representing a chain of consecutive blocks that +/// are profitable to layout together in order to maximize fallthrough +/// probabilities and code locality. We also can use a block chain to represent +/// a sequence of basic blocks which have some external (correctness) +/// requirement for sequential layout. +/// +/// Chains can be built around a single basic block and can be merged to grow +/// them. They participate in a block-to-chain mapping, which is updated +/// automatically as chains are merged together. +class BlockChain { + /// \brief The sequence of blocks belonging to this chain. + /// + /// This is the sequence of blocks for a particular chain. These will be laid + /// out in-order within the function. + SmallVector<MachineBasicBlock *, 4> Blocks; + + /// \brief A handle to the function-wide basic block to block chain mapping. + /// + /// This is retained in each block chain to simplify the computation of child + /// block chains for SCC-formation and iteration. We store the edges to child + /// basic blocks, and map them back to their associated chains using this + /// structure. + BlockToChainMapType &BlockToChain; + +public: + /// \brief Construct a new BlockChain. + /// + /// This builds a new block chain representing a single basic block in the + /// function. It also registers itself as the chain that block participates + /// in with the BlockToChain mapping. + BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + assert(BB && "Cannot create a chain with a null basic block"); + BlockToChain[BB] = this; + } + + /// \brief Iterator over blocks within the chain. + typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; + + /// \brief Beginning of blocks within the chain. + iterator begin() { return Blocks.begin(); } + + /// \brief End of blocks within the chain. + iterator end() { return Blocks.end(); } + + /// \brief Merge a block chain into this one. + /// + /// This routine merges a block chain into this one. It takes care of forming + /// a contiguous sequence of basic blocks, updating the edge list, and + /// updating the block -> chain mapping. It does not free or tear down the + /// old chain, but the old chain's block list is no longer valid. + void merge(MachineBasicBlock *BB, BlockChain *Chain) { + assert(BB); + assert(!Blocks.empty()); + + // Fast path in case we don't have a chain already. + if (!Chain) { + assert(!BlockToChain[BB]); + Blocks.push_back(BB); + BlockToChain[BB] = this; + return; + } + + assert(BB == *Chain->begin()); + assert(Chain->begin() != Chain->end()); + + // Update the incoming blocks to point to this chain, and add them to the + // chain structure. + for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); + BI != BE; ++BI) { + Blocks.push_back(*BI); + assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); + BlockToChain[*BI] = this; + } + } + +#ifndef NDEBUG + /// \brief Dump the blocks in this chain. + void dump() LLVM_ATTRIBUTE_USED { + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->dump(); + } +#endif // NDEBUG + + /// \brief Count of predecessors within the loop currently being processed. + /// + /// This count is updated at each loop we process to represent the number of + /// in-loop predecessors of this chain. + unsigned LoopPredecessors; +}; +} + +namespace { +class MachineBlockPlacement : public MachineFunctionPass { + /// \brief A typedef for a block filter set. + typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet; + + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + + /// \brief A handle to the loop info. + const MachineLoopInfo *MLI; + + /// \brief A handle to the target's instruction info. + const TargetInstrInfo *TII; + + /// \brief A handle to the target's lowering info. + const TargetLoweringBase *TLI; + + /// \brief Allocator and owner of BlockChain structures. + /// + /// We build BlockChains lazily while processing the loop structure of + /// a function. To reduce malloc traffic, we allocate them using this + /// slab-like allocator, and destroy them after the pass completes. An + /// important guarantee is that this allocator produces stable pointers to + /// the chains. + SpecificBumpPtrAllocator<BlockChain> ChainAllocator; + + /// \brief Function wide BasicBlock to BlockChain mapping. + /// + /// This mapping allows efficiently moving from any given basic block to the + /// BlockChain it participates in, if any. We use it to, among other things, + /// allow implicitly defining edges between chains as the existing edges + /// between basic blocks. + DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; + + void markChainSuccessors(BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *getFirstUnplacedBlock( + MachineFunction &F, + const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); + void buildChain(MachineBasicBlock *BB, BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + void buildLoopChains(MachineFunction &F, MachineLoop &L); + void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet); + void buildCFGChains(MachineFunction &F); + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacement() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacement::ID = 0; +char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) + +#ifndef NDEBUG +/// \brief Helper to print the name of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockName(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber() + << " (derived from LLVM BB '" << BB->getName() << "')"; + OS.flush(); + return Result; +} + +/// \brief Helper to print the number of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockNum(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber(); + OS.flush(); + return Result; +} +#endif + +/// \brief Mark a chain's successors as having one fewer preds. +/// +/// When a chain is being merged into the "placed" chain, this routine will +/// quickly walk the successors of each block in the chain and mark them as +/// having one fewer active predecessor. It also adds any successors of this +/// chain which reach the zero-predecessor state to the worklist passed in. +void MachineBlockPlacement::markChainSuccessors( + BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + // Walk all the blocks in this chain, marking their successors as having + // a predecessor placed. + for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); + CBI != CBE; ++CBI) { + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), + SE = (*CBI)->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || *SI == LoopHeaderBB) + continue; + + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) + BlockWorkList.push_back(*SuccChain.begin()); + } + } +} + +/// \brief Select the best successor for a block. +/// +/// This looks across all successors of a particular block and attempts to +/// select the "best" one to be the layout successor. It only considers direct +/// successors which also pass the block filter. It will attempt to avoid +/// breaking CFG structure, but cave and break such structures in the case of +/// very hot successor edges. +/// +/// \returns The best successor block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( + MachineBasicBlock *BB, BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(4, 5); // 80% + + MachineBasicBlock *BestSucc = 0; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we manually compute probabilities using the edge + // weights. This is suboptimal as it means that the somewhat subtle + // definition of edge weight semantics is encoded here as well. We should + // improve the MBPI interface to efficiently support query patterns such as + // this. + uint32_t BestWeight = 0; + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + continue; + } + if (*SI != *SuccChain.begin()) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n"); + continue; + } + + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + + // Only consider successors which are either "hot", or wouldn't violate + // any CFG constraints. + if (SuccChain.LoopPredecessors != 0) { + if (SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + continue; + } + + // Make sure that a hot successor doesn't have a globally more important + // predecessor. + BlockFrequency CandidateEdgeFreq + = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + bool BadCFGConflict = false; + for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), + PE = (*SI)->pred_end(); + PI != PE; ++PI) { + if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || + BlockToChain[*PI] == &Chain) + continue; + BlockFrequency PredEdgeFreq + = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + if (PredEdgeFreq >= CandidateEdgeFreq) { + BadCFGConflict = true; + break; + } + } + if (BadCFGConflict) { + DEBUG(dbgs() << " " << getBlockName(*SI) + << " -> non-cold CFG conflict\n"); + continue; + } + } + + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob)" + << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") + << "\n"); + if (BestSucc && BestWeight >= SuccWeight) + continue; + BestSucc = *SI; + BestWeight = SuccWeight; + } + return BestSucc; +} + +namespace { +/// \brief Predicate struct to detect blocks already placed. +class IsBlockPlaced { + const BlockChain &PlacedChain; + const BlockToChainMapType &BlockToChain; + +public: + IsBlockPlaced(const BlockChain &PlacedChain, + const BlockToChainMapType &BlockToChain) + : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} + + bool operator()(MachineBasicBlock *BB) const { + return BlockToChain.lookup(BB) == &PlacedChain; + } +}; +} + +/// \brief Select the best block from a worklist. +/// +/// This looks through the provided worklist as a list of candidate basic +/// blocks and select the most profitable one to place. The definition of +/// profitable only really makes sense in the context of a loop. This returns +/// the most frequently visited block in the worklist, which in the case of +/// a loop, is the one most desirable to be physically close to the rest of the +/// loop body in order to improve icache behavior. +/// +/// \returns The best block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter) { + // Once we need to walk the worklist looking for a candidate, cleanup the + // worklist of already placed entries. + // FIXME: If this shows up on profiles, it could be folded (at the cost of + // some code complexity) into the loop below. + WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), + IsBlockPlaced(Chain, BlockToChain)), + WorkList.end()); + + MachineBasicBlock *BestBlock = 0; + BlockFrequency BestFreq; + for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), + WBE = WorkList.end(); + WBI != WBE; ++WBI) { + BlockChain &SuccChain = *BlockToChain[*WBI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*WBI) + << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq + << " (freq)\n"); + if (BestBlock && BestFreq >= CandidateFreq) + continue; + BestBlock = *WBI; + BestFreq = CandidateFreq; + } + return BestBlock; +} + +/// \brief Retrieve the first unplaced basic block. +/// +/// This routine is called when we are unable to use the CFG to walk through +/// all of the basic blocks and form a chain due to unnatural loops in the CFG. +/// We walk through the function's blocks in order, starting from the +/// LastUnplacedBlockIt. We update this iterator on each call to avoid +/// re-scanning the entire sequence on repeated calls to this routine. +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; + ++I) { + if (BlockFilter && !BlockFilter->count(I)) + continue; + if (BlockToChain[I] != &PlacedChain) { + PrevUnplacedBlockIt = I; + // Now select the head of the chain to which the unplaced block belongs + // as the block to place. This will force the entire chain to be placed, + // and satisfies the requirements of merging chains. + return *BlockToChain[I]->begin(); + } + } + return 0; +} + +void MachineBlockPlacement::buildChain( + MachineBasicBlock *BB, + BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + MachineFunction &F = *BB->getParent(); + MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); + + MachineBasicBlock *LoopHeaderBB = BB; + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + BB = *llvm::prior(Chain.end()); + for (;;) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + assert(*llvm::prior(Chain.end()) == BB); + + // Look for the best viable successor if there is one to place immediately + // after this block. + MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + + // If an immediate successor isn't available, look for the best viable + // block among those we've identified as not violating the loop's CFG at + // this point. This won't be a fallthrough, but it will increase locality. + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + + if (!BestSucc) { + BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, + BlockFilter); + if (!BestSucc) + break; + + DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " + "layout successor until the CFG reduces\n"); + } + + // Place this block, updating the datastructures to reflect its placement. + BlockChain &SuccChain = *BlockToChain[BestSucc]; + // Zero out LoopPredecessors for the successor we're about to merge in case + // we selected a successor that didn't fit naturally into the CFG. + SuccChain.LoopPredecessors = 0; + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) + << " to " << getBlockNum(BestSucc) << "\n"); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + Chain.merge(BestSucc, &SuccChain); + BB = *llvm::prior(Chain.end()); + } + + DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockNum(*Chain.begin()) << "\n"); +} + +/// \brief Find the best loop top block for layout. +/// +/// Look for a block which is strictly better than the loop header for laying +/// out at the top of the loop. This looks for one and only one pattern: +/// a latch block with no conditional exit. This block will cause a conditional +/// jump around it or will be the bottom of the loop if we lay it out in place, +/// but if it it doesn't end up at the bottom of the loop for any reason, +/// rotation alone won't fix it. Because such a block will always result in an +/// unconditional jump (for the backedge) rotating it in front of the loop +/// header is always profitable. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Check that the header hasn't been fused with a preheader block due to + // crazy branches. If it has, we need to start with the header at the top to + // prevent pulling the preheader into the loop body. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + DEBUG(dbgs() << "Finding best loop top for: " + << getBlockName(L.getHeader()) << "\n"); + + BlockFrequency BestPredFreq; + MachineBasicBlock *BestPred = 0; + for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), + PE = L.getHeader()->pred_end(); + PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (!LoopBlockSet.count(Pred)) + continue; + DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " + << Pred->succ_size() << " successors, " + << MBFI->getBlockFreq(Pred) << " freq\n"); + if (Pred->succ_size() > 1) + continue; + + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + if (!BestPred || PredFreq > BestPredFreq || + (!(PredFreq < BestPredFreq) && + Pred->isLayoutSuccessor(L.getHeader()))) { + BestPred = Pred; + BestPredFreq = PredFreq; + } + } + + // If no direct predecessor is fine, just use the loop header. + if (!BestPred) + return L.getHeader(); + + // Walk backwards through any straight line of predecessors. + while (BestPred->pred_size() == 1 && + (*BestPred->pred_begin())->succ_size() == 1 && + *BestPred->pred_begin() != L.getHeader()) + BestPred = *BestPred->pred_begin(); + + DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); + return BestPred; +} + + +/// \brief Find the best loop exiting block for layout. +/// +/// This routine implements the logic to analyze the loop looking for the best +/// block to layout at the top of the loop. Typically this is done to maximize +/// fallthrough opportunities. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // We don't want to layout the loop linearly in all cases. If the loop header + // is just a normal basic block in the loop, we want to look for what block + // within the loop is the best one to layout at the top. However, if the loop + // header has be pre-merged into a chain due to predecessors not having + // analyzable branches, *and* the predecessor it is merged with is *not* part + // of the loop, rotating the header into the middle of the loop will create + // a non-contiguous range of blocks which is Very Bad. So start with the + // header and only rotate if safe. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return 0; + + BlockFrequency BestExitEdgeFreq; + unsigned BestExitLoopDepth = 0; + MachineBasicBlock *ExitingBB = 0; + // If there are exits to outer loops, loop rotation can severely limit + // fallthrough opportunites unless it selects such an exit. Keep a set of + // blocks where rotating to exit with that block will reach an outer loop. + SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; + + DEBUG(dbgs() << "Finding best loop exit for: " + << getBlockName(L.getHeader()) << "\n"); + for (MachineLoop::block_iterator I = L.block_begin(), + E = L.block_end(); + I != E; ++I) { + BlockChain &Chain = *BlockToChain[*I]; + // Ensure that this block is at the end of a chain; otherwise it could be + // mid-way through an inner loop or a successor of an analyzable branch. + if (*I != *llvm::prior(Chain.end())) + continue; + + // Now walk the successors. We need to establish whether this has a viable + // exiting successor and whether it has a viable non-exiting successor. + // We store the old exiting state and restore it if a viable looping + // successor isn't found. + MachineBasicBlock *OldExitingBB = ExitingBB; + BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; + bool HasLoopingSucc = false; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we use the internal weights and manually compute the + // probabilities to avoid quadratic behavior. + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); + for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), + SE = (*I)->succ_end(); + SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + if (*SI == *I) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Don't split chains, either this chain or the successor's chain. + if (&Chain == &SuccChain) { + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (chain conflict)\n"); + continue; + } + + uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); + if (LoopBlockSet.count(*SI)) { + DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (" << SuccWeight << ")\n"); + HasLoopingSucc = true; + continue; + } + + unsigned SuccLoopDepth = 0; + if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { + SuccLoopDepth = ExitLoop->getLoopDepth(); + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(*I); + } + + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " [L:" << SuccLoopDepth + << "] (" << ExitEdgeFreq << ")\n"); + // Note that we slightly bias this toward an existing layout successor to + // retain incoming order in the absence of better information. + // FIXME: Should we bias this more strongly? It's pretty weak. + if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || + ExitEdgeFreq > BestExitEdgeFreq || + ((*I)->isLayoutSuccessor(*SI) && + !(ExitEdgeFreq < BestExitEdgeFreq))) { + BestExitEdgeFreq = ExitEdgeFreq; + ExitingBB = *I; + } + } + + // Restore the old exiting state, no viable looping successor was found. + if (!HasLoopingSucc) { + ExitingBB = OldExitingBB; + BestExitEdgeFreq = OldBestExitEdgeFreq; + continue; + } + } + // Without a candidate exiting block or with only a single block in the + // loop, just use the loop header to layout the loop. + if (!ExitingBB || L.getNumBlocks() == 1) + return 0; + + // Also, if we have exit blocks which lead to outer loops but didn't select + // one of them as the exiting block we are rotating toward, disable loop + // rotation altogether. + if (!BlocksExitingToOuterLoop.empty() && + !BlocksExitingToOuterLoop.count(ExitingBB)) + return 0; + + DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + return ExitingBB; +} + +/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// +/// Once we have built a chain, try to rotate it to line up the hot exit block +/// with fallthrough out of the loop if doing so doesn't introduce unnecessary +/// branches. For example, if the loop has fallthrough into its header and out +/// of its bottom already, don't rotate it. +void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet) { + if (!ExitingBB) + return; + + MachineBasicBlock *Top = *LoopChain.begin(); + bool ViableTopFallthrough = false; + for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), + PE = Top->pred_end(); + PI != PE; ++PI) { + BlockChain *PredChain = BlockToChain[*PI]; + if (!LoopBlockSet.count(*PI) && + (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + ViableTopFallthrough = true; + break; + } + } + + // If the header has viable fallthrough, check whether the current loop + // bottom is a viable exiting block. If so, bail out as rotating will + // introduce an unnecessary branch. + if (ViableTopFallthrough) { + MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), + SE = Bottom->succ_end(); + SI != SE; ++SI) { + BlockChain *SuccChain = BlockToChain[*SI]; + if (!LoopBlockSet.count(*SI) && + (!SuccChain || *SI == *SuccChain->begin())) + return; + } + } + + BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), + ExitingBB); + if (ExitIt == LoopChain.end()) + return; + + std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); +} + +/// \brief Forms basic block chains from the natural loop structures. +/// +/// These chains are designed to preserve the existing *structure* of the code +/// as much as possible. We can then stitch the chains together in a way which +/// both preserves the topological structure and minimizes taken conditional +/// branches. +void MachineBlockPlacement::buildLoopChains(MachineFunction &F, + MachineLoop &L) { + // First recurse through any nested loops, building chains for those inner + // loops. + for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) + buildLoopChains(F, **LI); + + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); + + // First check to see if there is an obviously preferable top block for the + // loop. This will default to the header, but may end up as one of the + // predecessors to the header if there is one which will result in strictly + // fewer branches in the loop body. + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); + + // If we selected just the header for the loop top, look for a potentially + // profitable exit block in the event that rotating the loop can eliminate + // branches by placing an exit edge at the bottom. + MachineBasicBlock *ExitingBB = 0; + if (LoopTop == L.getHeader()) + ExitingBB = findBestLoopExit(F, L, LoopBlockSet); + + BlockChain &LoopChain = *BlockToChain[LoopTop]; + + // FIXME: This is a really lame way of walking the chains in the loop: we + // walk the blocks, and use a set to prevent visiting a particular chain + // twice. + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + assert(LoopChain.LoopPredecessors == 0); + UpdatedPreds.insert(&LoopChain); + for (MachineLoop::block_iterator BI = L.block_begin(), + BE = L.block_end(); + BI != BE; ++BI) { + BlockChain &Chain = *BlockToChain[*BI]; + if (!UpdatedPreds.insert(&Chain)) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); + + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadLoop = false; + if (LoopChain.LoopPredecessors) { + BadLoop = true; + dbgs() << "Loop chain contains a block without its preds placed!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; + } + for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); + BCI != BCE; ++BCI) { + dbgs() << " ... " << getBlockName(*BCI) << "\n"; + if (!LoopBlockSet.erase(*BCI)) { + // We don't mark the loop as bad here because there are real situations + // where this can occur. For example, with an unanalyzable fallthrough + // from a loop block to a non-loop block or vice versa. + dbgs() << "Loop chain contains a block not contained by the loop!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + } + + if (!LoopBlockSet.empty()) { + BadLoop = true; + for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), + LBE = LoopBlockSet.end(); + LBI != LBE; ++LBI) + dbgs() << "Loop contains blocks never placed into a chain!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*LBI) << "\n"; + } + assert(!BadLoop && "Detected problems with the placement of this loop."); + }); +} + +void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { + // Ensure that every BB in the function has an associated chain to simplify + // the assumptions of the remaining algorithm. + SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = FI; + BlockChain *Chain + = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + // Also, merge any blocks which we cannot reason about and must preserve + // the exact fallthrough behavior for. + for (;;) { + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) + break; + + MachineFunction::iterator NextFI(llvm::next(FI)); + MachineBasicBlock *NextBB = NextFI; + // Ensure that the layout successor is a viable block, as we know that + // fallthrough is a possibility. + assert(NextFI != FE && "Can't fallthrough past the last block."); + DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " + << getBlockName(BB) << " -> " << getBlockName(NextBB) + << "\n"); + Chain->merge(NextBB, 0); + FI = NextFI; + BB = NextBB; + } + } + + // Build any loop-based chains. + for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; + ++LI) + buildLoopChains(F, **LI); + + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = &*FI; + BlockChain &Chain = *BlockToChain[BB]; + if (!UpdatedPreds.insert(&Chain)) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + BlockChain &FunctionChain = *BlockToChain[&F.front()]; + buildChain(&F.front(), FunctionChain, BlockWorkList); + + typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadFunc = false; + FunctionBlockSetType FunctionBlockSet; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + FunctionBlockSet.insert(FI); + + for (BlockChain::iterator BCI = FunctionChain.begin(), + BCE = FunctionChain.end(); + BCI != BCE; ++BCI) + if (!FunctionBlockSet.erase(*BCI)) { + BadFunc = true; + dbgs() << "Function chain contains a block not in the function!\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + + if (!FunctionBlockSet.empty()) { + BadFunc = true; + for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), + FBE = FunctionBlockSet.end(); + FBI != FBE; ++FBI) + dbgs() << "Function contains blocks never placed into a chain!\n" + << " Bad block: " << getBlockName(*FBI) << "\n"; + } + assert(!BadFunc && "Detected problems with the block placement."); + }); + + // Splice the blocks into place. + MachineFunction::iterator InsertPos = F.begin(); + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); + BI != BE; ++BI) { + DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(*BI) << "\n"); + if (InsertPos != MachineFunction::iterator(*BI)) + F.splice(InsertPos, *BI); + else + ++InsertPos; + + // Update the terminator of the previous block. + if (BI == FunctionChain.begin()) + continue; + MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + + // FIXME: It would be awesome of updateTerminator would just return rather + // than assert when the branch cannot be analyzed in order to remove this + // boiler plate. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher weight first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) + << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + } + PrevBB->updateTerminator(); + } + } + + // Fixup the last block. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) + F.back().updateTerminator(); + + // Walk through the backedges of the function now that we have fully laid out + // the basic blocks and align the destination of each backedge. We don't rely + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. + if (F.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) + return; + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. + + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), + BE = FunctionChain.end(); + BI != BE; ++BI) { + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *llvm::prior(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, other predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); + } +} + +bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MLI = &getAnalysis<MachineLoopInfo>(); + TII = F.getTarget().getInstrInfo(); + TLI = F.getTarget().getTargetLowering(); + assert(BlockToChain.empty()); + + buildCFGChains(F); + + BlockToChain.clear(); + ChainAllocator.DestroyAll(); + + // We always return true as we have no way to track whether the final order + // differs from the original order. + return true; +} + +namespace { +/// \brief A pass to compute block placement statistics. +/// +/// A separate pass to compute interesting statistics for evaluating block +/// placement. This is separate from the actual placement pass so that they can +/// be computed in the absence of any placement transformations or when using +/// alternative placement strategies. +class MachineBlockPlacementStats : public MachineFunctionPass { + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacementStats() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacementStats::ID = 0; +char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) + +bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(I); + Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches + : NumUncondBranches; + Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq + : UncondBranchTakenFreq; + for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), + SE = I->succ_end(); + SI != SE; ++SI) { + // Skip if this successor is a fallthrough. + if (I->isLayoutSuccessor(*SI)) + continue; + + BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); + ++NumBranches; + BranchTakenFreq += EdgeFreq.getFrequency(); + } + } + + return false; +} + diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp new file mode 100644 index 0000000..ae70912 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -0,0 +1,126 @@ +//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis uses probability info stored in Machine Basic Blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) +INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) + +char MachineBranchProbabilityInfo::ID = 0; + +void MachineBranchProbabilityInfo::anchor() { } + +uint32_t MachineBranchProbabilityInfo:: +getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { + // First we compute the sum with 64-bits of precision, ensuring that cannot + // overflow by bounding the number of weights considered. Hopefully no one + // actually needs 2^32 successors. + assert(MBB->succ_size() < UINT32_MAX); + uint64_t Sum = 0; + Scale = 1; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + uint32_t Weight = getEdgeWeight(MBB, I); + Sum += Weight; + } + + // If the computed sum fits in 32-bits, we're done. + if (Sum <= UINT32_MAX) + return Sum; + + // Otherwise, compute the scale necessary to cause the weights to fit, and + // re-sum with that scale applied. + assert((Sum / UINT32_MAX) < UINT32_MAX); + Scale = (Sum / UINT32_MAX) + 1; + Sum = 0; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + uint32_t Weight = getEdgeWeight(MBB, I); + Sum += Weight / Scale; + } + assert(Sum <= UINT32_MAX); + return Sum; +} + +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { + uint32_t Weight = Src->getSuccWeight(Dst); + if (!Weight) + return DEFAULT_WEIGHT; + return Weight; +} + +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + // This is a linear search. Try to use the const_succ_iterator version when + // possible. + return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); +} + +bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); +} + +MachineBasicBlock * +MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { + uint32_t MaxWeight = 0; + MachineBasicBlock *MaxSucc = 0; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + uint32_t Weight = getEdgeWeight(MBB, I); + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = *I; + } + } + + if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) + return MaxSucc; + + return 0; +} + +BranchProbability +MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + uint32_t Scale = 1; + uint32_t D = getSumForBlock(Src, Scale); + uint32_t N = getEdgeWeight(Src, Dst) / Scale; + + return BranchProbability(N, D); +} + +raw_ostream &MachineBranchProbabilityInfo:: +printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp new file mode 100644 index 0000000..61d8d38 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -0,0 +1,661 @@ +//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs global common subexpression elimination on machine +// instructions using a scoped hash table based value numbering scheme. It +// must be run while the machine function is still in SSA form. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-cse" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumCSEs, "Number of common subexpression eliminated"); +STATISTIC(NumPhysCSEs, + "Number of physreg referencing common subexpr eliminated"); +STATISTIC(NumCrossBBCSEs, + "Number of cross-MBB physreg referencing CS eliminated"); +STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); + +namespace { + class MachineCSE : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + AliasAnalysis *AA; + MachineDominatorTree *DT; + MachineRegisterInfo *MRI; + public: + static char ID; // Pass identification + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) { + initializeMachineCSEPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + + virtual void releaseMemory() { + ScopeMap.clear(); + Exps.clear(); + } + + private: + const unsigned LookAheadLimit; + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy; + typedef ScopedHashTable<MachineInstr*, unsigned, + MachineInstrExpressionTrait, AllocatorTy> ScopedHTType; + typedef ScopedHTType::ScopeTy ScopeType; + DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; + ScopedHTType VNT; + SmallVector<MachineInstr*, 64> Exps; + unsigned CurrVN; + + bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const; + bool hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs, + bool &PhysUseDef) const; + bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs, + bool &NonLocal) const; + bool isCSECandidate(MachineInstr *MI); + bool isProfitableToCSE(unsigned CSReg, unsigned Reg, + MachineInstr *CSMI, MachineInstr *MI); + void EnterScope(MachineBasicBlock *MBB); + void ExitScope(MachineBasicBlock *MBB); + bool ProcessBlock(MachineBasicBlock *MBB); + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); + bool PerformCSE(MachineDomTreeNode *Node); + }; +} // end anonymous namespace + +char MachineCSE::ID = 0; +char &llvm::MachineCSEID = MachineCSE::ID; +INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false) + +bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB) { + bool Changed = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (!MRI->hasOneNonDBGUse(Reg)) + // Only coalesce single use copies. This ensure the copy will be + // deleted. + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI->isCopy()) + continue; + unsigned SrcReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + continue; + if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + continue; + if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) + continue; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); + DefMI->eraseFromParent(); + ++NumCoalesces; + Changed = true; + } + + return Changed; +} + +bool +MachineCSE::isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const { + unsigned LookAheadLeft = LookAheadLimit; + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I->isDebugValue()) + ++I; + + if (I == E) + // Reached end of block, register is obviously dead. + return true; + + bool SeenDef = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I->getOperand(i); + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + SeenDef = true; + if (!MO.isReg() || !MO.getReg()) + continue; + if (!TRI->regsOverlap(MO.getReg(), Reg)) + continue; + if (MO.isUse()) + // Found a use! + return false; + SeenDef = true; + } + if (SeenDef) + // See a def of Reg (or an alias) before encountering any use, it's + // trivially dead. + return true; + + --LookAheadLeft; + ++I; + } + return false; +} + +/// hasLivePhysRegDefUses - Return true if the specified instruction read/write +/// physical registers (except for dead defs of physical registers). It also +/// returns the physical register def by reference if it's the only one and the +/// instruction does not uses a physical register. +bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs, + bool &PhysUseDef) const{ + // First, add all uses to PhysRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + } + + // Next, collect all defs into PhysDefs. If any is already in PhysRefs + // (which currently contains only uses), set the PhysUseDef flag. + PhysUseDef = false; + MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Check against PhysRefs even if the def is "dead". + if (PhysRefs.count(Reg)) + PhysUseDef = true; + // If the def is dead, it's ok. But the def may not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) + PhysDefs.push_back(Reg); + } + + // Finally, add all defs to PhysRefs as well. + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) + for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + + return !PhysRefs.empty(); +} + +bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<unsigned,8> &PhysRefs, + SmallVector<unsigned,2> &PhysDefs, + bool &NonLocal) const { + // For now conservatively returns false if the common subexpression is + // not in the same basic block as the given instruction. The only exception + // is if the common subexpression is in the sole predecessor block. + const MachineBasicBlock *MBB = MI->getParent(); + const MachineBasicBlock *CSMBB = CSMI->getParent(); + + bool CrossMBB = false; + if (CSMBB != MBB) { + if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) + return false; + + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { + if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) + // Avoid extending live range of physical registers if they are + //allocatable or reserved. + return false; + } + CrossMBB = true; + } + MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); + MachineBasicBlock::const_iterator E = MI; + MachineBasicBlock::const_iterator EE = CSMBB->end(); + unsigned LookAheadLeft = LookAheadLimit; + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I != EE && I->isDebugValue()) + ++I; + + if (I == EE) { + assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); + (void)CrossMBB; + CrossMBB = false; + NonLocal = true; + I = MBB->begin(); + EE = MBB->end(); + continue; + } + + if (I == E) + return true; + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I->getOperand(i); + // RegMasks go on instructions like calls that clobber lots of physregs. + // Don't attempt to CSE across such an instruction. + if (MO.isRegMask()) + return false; + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + continue; + if (PhysRefs.count(MOReg)) + return false; + } + + --LookAheadLeft; + ++I; + } + + return false; +} + +bool MachineCSE::isCSECandidate(MachineInstr *MI) { + if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || + MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) + return false; + + // Ignore copies. + if (MI->isCopyLike()) + return false; + + // Ignore stuff that we obviously can't move. + if (MI->mayStore() || MI->isCall() || MI->isTerminator() || + MI->hasUnmodeledSideEffects()) + return false; + + if (MI->mayLoad()) { + // Okay, this instruction does a load. As a refinement, we allow the target + // to decide whether the loaded value is actually a constant. If so, we can + // actually use it as a load. + if (!MI->isInvariantLoad(AA)) + // FIXME: we should be able to hoist loads with no other side effects if + // there are no other instructions which can change memory in this loop. + // This is a trivial form of alias analysis. + return false; + } + return true; +} + +/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a +/// common expression that defines Reg. +bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, + MachineInstr *CSMI, MachineInstr *MI) { + // FIXME: Heuristics that works around the lack the live range splitting. + + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet<MachineInstr*, 8> CSUses; + for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + CSUses.insert(Use); + } + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + if (!CSUses.count(Use)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in + // an immediate predecessor. We don't want to increase register pressure and + // end up causing other computation to be spilled. + if (MI->isAsCheapAsAMove()) { + MachineBasicBlock *CSBB = CSMI->getParent(); + MachineBasicBlock *BB = MI->getParent(); + if (CSBB != BB && !CSBB->isSuccessor(BB)) + return false; + } + + // Heuristics #2: If the expression doesn't not use a vr and the only use + // of the redundant computation are copies, do not cse. + bool HasVRegUse = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + HasVRegUse = true; + break; + } + } + if (!HasVRegUse) { + bool HasNonCopyUse = false; + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + // Ignore copies. + if (!Use->isCopyLike()) { + HasNonCopyUse = true; + break; + } + } + if (!HasNonCopyUse) + return false; + } + + // Heuristics #3: If the common subexpression is used by PHIs, do not reuse + // it unless the defined value is already used in the BB of the new use. + bool HasPHI = false; + SmallPtrSet<MachineBasicBlock*, 4> CSBBs; + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + HasPHI |= Use->isPHI(); + CSBBs.insert(Use->getParent()); + } + + if (!HasPHI) + return true; + return CSBBs.count(MI->getParent()); +} + +void MachineCSE::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + ScopeType *Scope = new ScopeType(VNT); + ScopeMap[MBB] = Scope; +} + +void MachineCSE::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); + assert(SI != ScopeMap.end()); + delete SI->second; + ScopeMap.erase(SI); +} + +bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; + SmallVector<unsigned, 2> ImplicitDefsToUpdate; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (!isCSECandidate(MI)) + continue; + + bool FoundCSE = VNT.count(MI); + if (!FoundCSE) { + // Look for trivial copy coalescing opportunities. + if (PerformTrivialCoalescing(MI, MBB)) { + Changed = true; + + // After coalescing MI itself may become a copy. + if (MI->isCopyLike()) + continue; + FoundCSE = VNT.count(MI); + } + } + + // Commute commutable instructions. + bool Commuted = false; + if (!FoundCSE && MI->isCommutable()) { + MachineInstr *NewMI = TII->commuteInstruction(MI); + if (NewMI) { + Commuted = true; + FoundCSE = VNT.count(NewMI); + if (NewMI != MI) { + // New instruction. It doesn't need to be kept. + NewMI->eraseFromParent(); + Changed = true; + } else if (!FoundCSE) + // MI was changed but it didn't help, commute it back! + (void)TII->commuteInstruction(MI); + } + } + + // If the instruction defines physical registers and the values *may* be + // used, then it's not safe to replace it with a common subexpression. + // It's also not safe if the instruction uses physical registers. + bool CrossMBBPhysDef = false; + SmallSet<unsigned, 8> PhysRefs; + SmallVector<unsigned, 2> PhysDefs; + bool PhysUseDef = false; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, + PhysDefs, PhysUseDef)) { + FoundCSE = false; + + // ... Unless the CS is local or is in the sole predecessor block + // and it also defines the physical register which is not clobbered + // in between and the physical register uses were not clobbered. + // This can never be the case if the instruction both uses and + // defines the same physical register, which was detected above. + if (!PhysUseDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + FoundCSE = true; + } + } + + if (!FoundCSE) { + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + continue; + } + + // Found a common subexpression, eliminate it. + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + DEBUG(dbgs() << "Examining: " << *MI); + DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); + + // Check if it's profitable to perform this CSE. + bool DoCSE = true; + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned OldReg = MO.getReg(); + unsigned NewReg = CSMI->getOperand(i).getReg(); + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; + continue; + } + + assert(TargetRegisterInfo::isVirtualRegister(OldReg) && + TargetRegisterInfo::isVirtualRegister(NewReg) && + "Do not CSE physical register defs!"); + + if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); + DoCSE = false; + break; + } + + // Don't perform CSE if the result of the old instruction cannot exist + // within the register class of the new instruction. + const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); + if (!MRI->constrainRegClass(NewReg, OldRC)) { + DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); + DoCSE = false; + break; + } + + CSEPairs.push_back(std::make_pair(OldReg, NewReg)); + --NumDefs; + } + + // Actually perform the elimination. + if (DoCSE) { + for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { + MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); + MRI->clearKillFlags(CSEPairs[i].second); + } + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + + if (CrossMBBPhysDef) { + // Add physical register defs now coming in from a predecessor to MBB + // livein list. + while (!PhysDefs.empty()) { + unsigned LiveIn = PhysDefs.pop_back_val(); + if (!MBB->isLiveIn(LiveIn)) + MBB->addLiveIn(LiveIn); + } + ++NumCrossBBCSEs; + } + + MI->eraseFromParent(); + ++NumCSEs; + if (!PhysRefs.empty()) + ++NumPhysCSEs; + if (Commuted) + ++NumCommutes; + Changed = true; + } else { + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + } + CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); + } + + return Changed; +} + +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void +MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { + if (OpenChildren[Node]) + return; + + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = Node->getIDom()) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + CurrVN = 0; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(Node); + do { + Node = WorkList.pop_back_val(); + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + OpenChildren[Node] = NumChildren; + for (unsigned i = 0; i != NumChildren; ++i) { + MachineDomTreeNode *Child = Children[i]; + WorkList.push_back(Child); + } + } while (!WorkList.empty()); + + // Now perform CSE. + bool Changed = false; + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); + EnterScope(MBB); + Changed |= ProcessBlock(MBB); + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren); + } + + return Changed; +} + +bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<MachineDominatorTree>(); + return PerformCSE(DT->getRootNode()); +} diff --git a/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp new file mode 100644 index 0000000..81b4978 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineCodeEmitter.h" + +using namespace llvm; + +void MachineCodeEmitter::anchor() { } diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp new file mode 100644 index 0000000..dc8a224 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -0,0 +1,334 @@ +//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level copy propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-cp" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +STATISTIC(NumDeletes, "Number of dead copies deleted"); + +namespace { + class MachineCopyPropagation : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + + public: + static char ID; // Pass identification, replacement for typeid + MachineCopyPropagation() : MachineFunctionPass(ID) { + initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef SmallVector<unsigned, 4> DestList; + typedef DenseMap<unsigned, DestList> SourceMap; + + void SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap); + bool CopyPropagateBlock(MachineBasicBlock &MBB); + void removeCopy(MachineInstr *MI); + }; +} +char MachineCopyPropagation::ID = 0; +char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; + +INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", + "Machine Copy Propagation Pass", false, false) + +void +MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + SourceMap::iterator SI = SrcMap.find(*AI); + if (SI != SrcMap.end()) { + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. + if (AvailCopyMap.erase(MappedDef)) { + for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) + AvailCopyMap.erase(*SR); + } + } + } + } +} + +static bool NoInterveningSideEffect(const MachineInstr *CopyMI, + const MachineInstr *MI) { + const MachineBasicBlock *MBB = CopyMI->getParent(); + if (MI->getParent() != MBB) + return false; + MachineBasicBlock::const_iterator I = CopyMI; + MachineBasicBlock::const_iterator E = MBB->end(); + MachineBasicBlock::const_iterator E2 = MI; + + ++I; + while (I != E && I != E2) { + if (I->hasUnmodeledSideEffects() || I->isCall() || + I->isTerminator()) + return false; + ++I; + } + return true; +} + +/// isNopCopy - Return true if the specified copy is really a nop. That is +/// if the source of the copy is the same of the definition of the copy that +/// supplied the source. If the source of the copy is a sub-register than it +/// must check the sub-indices match. e.g. +/// ecx = mov eax +/// al = mov cl +/// But not +/// ecx = mov eax +/// al = mov ch +static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, + const TargetRegisterInfo *TRI) { + unsigned SrcSrc = CopyMI->getOperand(1).getReg(); + if (Def == SrcSrc) + return true; + if (TRI->isSubRegister(SrcSrc, Def)) { + unsigned SrcDef = CopyMI->getOperand(0).getReg(); + unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def); + if (!SubIdx) + return false; + return SubIdx == TRI->getSubRegIndex(SrcDef, Src); + } + + return false; +} + +// Remove MI from the function because it has been determined it is dead. +// Turn it into a noop KILL instruction if it has super-register liveness +// adjustments. +void MachineCopyPropagation::removeCopy(MachineInstr *MI) { + if (MI->getNumOperands() == 2) + MI->eraseFromParent(); + else + MI->setDesc(TII->get(TargetOpcode::KILL)); +} + +bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { + SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion + DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map + DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map + SourceMap SrcMap; // Src -> Def map + + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (MI->isCopy()) { + unsigned Def = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Def) || + TargetRegisterInfo::isVirtualRegister(Src)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); + if (CI != AvailCopyMap.end()) { + MachineInstr *CopyMI = CI->second; + if (!MRI->isReserved(Def) && + (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && + isNopCopy(CopyMI, Def, Src, TRI)) { + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ECX<def> = COPY %EAX<kill> + // ... nothing clobbered EAX. + // %EAX<def> = COPY %ECX + // => + // %ECX<def> = COPY %EAX + // + // Also avoid eliminating a copy from reserved registers unless the + // definition is proven not clobbered. e.g. + // %RSP<def> = COPY %RAX + // CALL + // %RAX<def> = COPY %RSP + + // Clear any kills of Def between CopyMI and MI. This extends the + // live range. + for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) + I->clearRegisterKills(Def, TRI); + + removeCopy(MI); + Changed = true; + ++NumDeletes; + continue; + } + } + + // If Src is defined by a previous copy, it cannot be eliminated. + for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { + CI = CopyMap.find(*AI); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + + // Copy is now a candidate for deletion. + MaybeDeadCopies.insert(MI); + + // If 'Src' is previously source of another copy, then this earlier copy's + // source is no longer available. e.g. + // %xmm9<def> = copy %xmm2 + // ... + // %xmm2<def> = copy %xmm0 + // ... + // %xmm2<def> = copy %xmm9 + SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); + + // Remember Def is defined by the copy. + // ... Make sure to clear the def maps of aliases first. + for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); + } + CopyMap[Def] = MI; + AvailCopyMap[Def] = MI; + for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) { + CopyMap[*SR] = MI; + AvailCopyMap[*SR] = MI; + } + + // Remember source that's copied to Def. Once it's clobbered, then + // it's no longer available for copy propagation. + if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == + SrcMap[Src].end()) { + SrcMap[Src].push_back(Def); + } + + continue; + } + + // Not a copy. + SmallVector<unsigned, 2> Defs; + int RegMaskOpNum = -1; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + RegMaskOpNum = i; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + if (MO.isDef()) { + Defs.push_back(Reg); + continue; + } + + // If 'Reg' is defined by a copy, the copy is no longer a candidate + // for elimination. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + } + + // The instruction has a register mask operand which means that it clobbers + // a large set of registers. It is possible to use the register mask to + // prune the available copies, but treat it like a basic block boundary for + // now. + if (RegMaskOpNum >= 0) { + // Erase any MaybeDeadCopies whose destination register is clobbered. + const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum); + for (SmallSetVector<MachineInstr*, 8>::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + unsigned Reg = (*DI)->getOperand(0).getReg(); + if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) + continue; + removeCopy(*DI); + Changed = true; + ++NumDeletes; + } + + // Clear all data structures as if we were beginning a new basic block. + MaybeDeadCopies.clear(); + AvailCopyMap.clear(); + CopyMap.clear(); + SrcMap.clear(); + continue; + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + + // No longer defined by a copy. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); + } + + // If 'Reg' is previously source of a copy, it is no longer available for + // copy propagation. + SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); + } + } + + // If MBB doesn't have successors, delete the copies whose defs are not used. + // If MBB does have successors, then conservative assume the defs are live-out + // since we don't want to trust live-in lists. + if (MBB.succ_empty()) { + for (SmallSetVector<MachineInstr*, 8>::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { + removeCopy(*DI); + Changed = true; + ++NumDeletes; + } + } + } + + return Changed; +} + +bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= CopyPropagateBlock(*I); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp new file mode 100644 index 0000000..04c8ecb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -0,0 +1,59 @@ +//===- MachineDominators.cpp - Machine Dominator Calculation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// forward dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +namespace llvm { +TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>); +TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>); +} + +char MachineDominatorTree::ID = 0; + +INITIALIZE_PASS(MachineDominatorTree, "machinedomtree", + "MachineDominator Tree Construction", true, true) + +char &llvm::MachineDominatorsID = MachineDominatorTree::ID; + +void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + + return false; +} + +MachineDominatorTree::MachineDominatorTree() + : MachineFunctionPass(ID) { + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + DT = new DominatorTreeBase<MachineBasicBlock>(false); +} + +MachineDominatorTree::~MachineDominatorTree() { + delete DT; +} + +void MachineDominatorTree::releaseMemory() { + DT->releaseMemory(); +} + +void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { + DT->print(OS); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp new file mode 100644 index 0000000..04321f3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -0,0 +1,898 @@ +//===-- MachineFunction.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect native machine code information for a function. This allows +// target-specific information about the generated code to be stored with each +// function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineFunction implementation +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +MachineFunctionInfo::~MachineFunctionInfo() {} + +void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { + MBB->getParent()->DeleteMachineBasicBlock(MBB); +} + +MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, + unsigned FunctionNum, MachineModuleInfo &mmi, + GCModuleInfo* gmi) + : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { + if (TM.getRegisterInfo()) + RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); + else + RegInfo = 0; + MFInfo = 0; + FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(), + TM.Options.RealignStack); + if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)) + FrameInfo->ensureMaxAlignment(Fn->getAttributes(). + getStackAlignment(AttributeSet::FunctionIndex)); + ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); + Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. + if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) + Alignment = std::max(Alignment, + TM.getTargetLowering()->getPrefFunctionAlignment()); + FunctionNumber = FunctionNum; + JumpTableInfo = 0; +} + +MachineFunction::~MachineFunction() { + // Don't call destructors on MachineInstr and MachineOperand. All of their + // memory comes from the BumpPtrAllocator which is about to be purged. + // + // Do call MachineBasicBlock destructors, it contains std::vectors. + for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I)) + I->Insts.clearAndLeakNodesUnsafely(); + + InstructionRecycler.clear(Allocator); + OperandRecycler.clear(Allocator); + BasicBlockRecycler.clear(Allocator); + if (RegInfo) { + RegInfo->~MachineRegisterInfo(); + Allocator.Deallocate(RegInfo); + } + if (MFInfo) { + MFInfo->~MachineFunctionInfo(); + Allocator.Deallocate(MFInfo); + } + + FrameInfo->~MachineFrameInfo(); + Allocator.Deallocate(FrameInfo); + + ConstantPool->~MachineConstantPool(); + Allocator.Deallocate(ConstantPool); + + if (JumpTableInfo) { + JumpTableInfo->~MachineJumpTableInfo(); + Allocator.Deallocate(JumpTableInfo); + } +} + +/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it +/// does already exist, allocate one. +MachineJumpTableInfo *MachineFunction:: +getOrCreateJumpTableInfo(unsigned EntryKind) { + if (JumpTableInfo) return JumpTableInfo; + + JumpTableInfo = new (Allocator) + MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); + return JumpTableInfo; +} + +/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and +/// recomputes them. This guarantees that the MBB numbers are sequential, +/// dense, and match the ordering of the blocks within the function. If a +/// specific MachineBasicBlock is specified, only that block and those after +/// it are renumbered. +void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { + if (empty()) { MBBNumbering.clear(); return; } + MachineFunction::iterator MBBI, E = end(); + if (MBB == 0) + MBBI = begin(); + else + MBBI = MBB; + + // Figure out the block number this should have. + unsigned BlockNo = 0; + if (MBBI != begin()) + BlockNo = prior(MBBI)->getNumber()+1; + + for (; MBBI != E; ++MBBI, ++BlockNo) { + if (MBBI->getNumber() != (int)BlockNo) { + // Remove use of the old number. + if (MBBI->getNumber() != -1) { + assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && + "MBB number mismatch!"); + MBBNumbering[MBBI->getNumber()] = 0; + } + + // If BlockNo is already taken, set that block's number to -1. + if (MBBNumbering[BlockNo]) + MBBNumbering[BlockNo]->setNumber(-1); + + MBBNumbering[BlockNo] = MBBI; + MBBI->setNumber(BlockNo); + } + } + + // Okay, all the blocks are renumbered. If we have compactified the block + // numbering, shrink MBBNumbering now. + assert(BlockNo <= MBBNumbering.size() && "Mismatch!"); + MBBNumbering.resize(BlockNo); +} + +/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead +/// of `new MachineInstr'. +/// +MachineInstr * +MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, + DebugLoc DL, bool NoImp) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(*this, MCID, DL, NoImp); +} + +/// CloneMachineInstr - Create a new MachineInstr which is a copy of the +/// 'Orig' instruction, identical in all ways except the instruction +/// has no parent, prev, or next. +/// +MachineInstr * +MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(*this, *Orig); +} + +/// DeleteMachineInstr - Delete the given MachineInstr. +/// +/// This function also serves as the MachineInstr destructor - the real +/// ~MachineInstr() destructor must be empty. +void +MachineFunction::DeleteMachineInstr(MachineInstr *MI) { + // Strip it for parts. The operand array and the MI object itself are + // independently recyclable. + if (MI->Operands) + deallocateOperandArray(MI->CapOperands, MI->Operands); + // Don't call ~MachineInstr() which must be trivial anyway because + // ~MachineFunction drops whole lists of MachineInstrs wihout calling their + // destructors. + InstructionRecycler.Deallocate(Allocator, MI); +} + +/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this +/// instead of `new MachineBasicBlock'. +/// +MachineBasicBlock * +MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { + return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator)) + MachineBasicBlock(*this, bb); +} + +/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. +/// +void +MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { + assert(MBB->getParent() == this && "MBB parent mismatch!"); + MBB->~MachineBasicBlock(); + BasicBlockRecycler.Deallocate(Allocator, MBB); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, + uint64_t s, unsigned base_alignment, + const MDNode *TBAAInfo, + const MDNode *Ranges) { + return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, + TBAAInfo, Ranges); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + int64_t Offset, uint64_t Size) { + return new (Allocator) + MachineMemOperand(MachinePointerInfo(MMO->getValue(), + MMO->getOffset()+Offset), + MMO->getFlags(), Size, + MMO->getBaseAlignment(), 0); +} + +MachineInstr::mmo_iterator +MachineFunction::allocateMemRefsArray(unsigned long Num) { + return Allocator.Allocate<MachineMemOperand *>(Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isLoad()) + ++Num; + + // Allocate a new array and populate it with the load information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isLoad()) { + if (!(*I)->isStore()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the store flag. + MachineMemOperand *JustLoad = + getMachineMemOperand((*I)->getPointerInfo(), + (*I)->getFlags() & ~MachineMemOperand::MOStore, + (*I)->getSize(), (*I)->getBaseAlignment(), + (*I)->getTBAAInfo()); + Result[Index] = JustLoad; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isStore()) + ++Num; + + // Allocate a new array and populate it with the store information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isStore()) { + if (!(*I)->isLoad()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the load flag. + MachineMemOperand *JustStore = + getMachineMemOperand((*I)->getPointerInfo(), + (*I)->getFlags() & ~MachineMemOperand::MOLoad, + (*I)->getSize(), (*I)->getBaseAlignment(), + (*I)->getTBAAInfo()); + Result[Index] = JustStore; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineFunction::dump() const { + print(dbgs()); +} +#endif + +StringRef MachineFunction::getName() const { + assert(getFunction() && "No function!"); + return getFunction()->getName(); +} + +void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { + OS << "# Machine code for function " << getName() << ": "; + if (RegInfo) { + OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); + if (!RegInfo->tracksLiveness()) + OS << ", not tracking liveness"; + } + OS << '\n'; + + // Print Frame Information + FrameInfo->print(*this, OS); + + // Print JumpTable Information + if (JumpTableInfo) + JumpTableInfo->print(OS); + + // Print Constant Pool + ConstantPool->print(OS); + + const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); + + if (RegInfo && !RegInfo->livein_empty()) { + OS << "Function Live Ins: "; + for (MachineRegisterInfo::livein_iterator + I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) { + OS << PrintReg(I->first, TRI); + if (I->second) + OS << " in " << PrintReg(I->second, TRI); + if (llvm::next(I) != E) + OS << ", "; + } + OS << '\n'; + } + + for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { + OS << '\n'; + BB->print(OS, Indexes); + } + + OS << "\n# End machine code for function " << getName() << ".\n\n"; +} + +namespace llvm { + template<> + struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const MachineFunction *F) { + return "CFG for '" + F->getName().str() + "' function"; + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (isSimple()) { + OSS << "BB#" << Node->getNumber(); + if (const BasicBlock *BB = Node->getBasicBlock()) + OSS << ": " << BB->getName(); + } else + Node->print(OSS); + } + + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + return OutStr; + } + }; +} + +void MachineFunction::viewCFG() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getName()); +#else + errs() << "MachineFunction::viewCFG is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +void MachineFunction::viewCFGOnly() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getName(), true); +#else + errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// addLiveIn - Add the specified physical register as a live-in value and +/// create a corresponding virtual register for it. +unsigned MachineFunction::addLiveIn(unsigned PReg, + const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = getRegInfo(); + unsigned VReg = MRI.getLiveInVirtReg(PReg); + if (VReg) { + assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!"); + return VReg; + } + VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(PReg, VReg); + return VReg; +} + +/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. +/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a +/// normal 'L' label is returned. +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, + bool isLinkerPrivate) const { + assert(JumpTableInfo && "No jump tables"); + assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); + const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); + + const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : + MAI.getPrivateGlobalPrefix(); + SmallString<60> Name; + raw_svector_ostream(Name) + << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; + return Ctx.GetOrCreateSymbol(Name.str()); +} + +/// getPICBaseSymbol - Return a function-local symbol to represent the PIC +/// base. +MCSymbol *MachineFunction::getPICBaseSymbol() const { + const MCAsmInfo &MAI = *Target.getMCAsmInfo(); + return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + Twine(getFunctionNumber())+"$pb"); +} + +//===----------------------------------------------------------------------===// +// MachineFrameInfo implementation +//===----------------------------------------------------------------------===// + +/// ensureMaxAlignment - Make sure the function is at least Align bytes +/// aligned. +void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { + if (!TFI.isStackRealignable() || !RealignOption) + assert(Align <= TFI.getStackAlignment() && + "For targets without stack realignment, Align is out of limit!"); + if (MaxAlignment < Align) MaxAlignment = Align; +} + +/// clampStackAlignment - Clamp the alignment if requested and emit a warning. +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, + unsigned StackAlign) { + if (!ShouldClamp || Align <= StackAlign) + return Align; + DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); + return StackAlign; +} + +/// CreateStackObject - Create a new statically sized stack object, returning +/// a nonnegative identifier to represent it. +/// +int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { + assert(Size != 0 && "Cannot allocate zero size stack objects!"); + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, + Alloca)); + int Index = (int)Objects.size() - NumFixedObjects - 1; + assert(Index >= 0 && "Bad frame index!"); + ensureMaxAlignment(Alignment); + return Index; +} + +/// CreateSpillStackObject - Create a new statically sized stack object that +/// represents a spill slot, returning a nonnegative identifier to represent +/// it. +/// +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, + unsigned Alignment) { + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); + CreateStackObject(Size, Alignment, true, false); + int Index = (int)Objects.size() - NumFixedObjects - 1; + ensureMaxAlignment(Alignment); + return Index; +} + +/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a +/// variable sized object has been created. This must be created whenever a +/// variable sized object is created, whether or not the index returned is +/// actually used. +/// +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { + HasVarSizedObjects = true; + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); + Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); + ensureMaxAlignment(Alignment); + return (int)Objects.size()-NumFixedObjects-1; +} + +/// CreateFixedObject - Create a new object at a fixed location on the stack. +/// All fixed objects should be created before other objects are created for +/// efficiency. By default, fixed objects are immutable. This returns an +/// index with a negative value. +/// +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = TFI.getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Align, TFI.getStackAlignment()); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ false, + /*NeedSP*/ false, + /*Alloca*/ 0)); + return -++NumFixedObjects; +} + + +BitVector +MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { + assert(MBB && "MBB must be valid"); + const MachineFunction *MF = MBB->getParent(); + assert(MF && "MBB must be part of a MachineFunction"); + const TargetMachine &TM = MF->getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + BV.set(*CSR); + + // The entry MBB always has all CSRs pristine. + if (MBB == &MF->front()) + return BV; + + // On other MBBs the saved CSRs are not pristine. + const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo(); + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) + BV.reset(I->getReg()); + + return BV; +} + +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ + if (Objects.empty()) return; + + const TargetFrameLowering *FI = MF.getTarget().getFrameLowering(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + OS << "Frame Objects:\n"; + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " fi#" << (int)(i-NumFixedObjects) << ": "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size=" << SO.Size; + OS << ", align=" << SO.Alignment; + + if (i < NumFixedObjects) + OS << ", fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << ", at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// MachineJumpTableInfo implementation +//===----------------------------------------------------------------------===// + +/// getEntrySize - Return the size of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { + // The size of a jump table entry is 4 bytes unless the entry is just the + // address of a block, in which case it is the pointer size. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerSize(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return 8; + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return 4; + case MachineJumpTableInfo::EK_Inline: + return 0; + } + llvm_unreachable("Unknown jump table encoding!"); +} + +/// getEntryAlignment - Return the alignment of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { + // The alignment of a jump table entry is the alignment of int32 unless the + // entry is just the address of a block, in which case it is the pointer + // alignment. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerABIAlignment(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return TD.getABIIntegerTypeAlignment(64); + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return TD.getABIIntegerTypeAlignment(32); + case MachineJumpTableInfo::EK_Inline: + return 1; + } + llvm_unreachable("Unknown jump table encoding!"); +} + +/// createJumpTableIndex - Create a new jump table entry in the jump table info. +/// +unsigned MachineJumpTableInfo::createJumpTableIndex( + const std::vector<MachineBasicBlock*> &DestBBs) { + assert(!DestBBs.empty() && "Cannot create an empty jump table!"); + JumpTables.push_back(MachineJumpTableEntry(DestBBs)); + return JumpTables.size()-1; +} + +/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update +/// the jump tables to branch to New instead. +bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + for (size_t i = 0, e = JumpTables.size(); i != e; ++i) + ReplaceMBBInJumpTable(i, Old, New); + return MadeChange; +} + +/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update +/// the jump table to branch to New instead. +bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, + MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + MachineJumpTableEntry &JTE = JumpTables[Idx]; + for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) + if (JTE.MBBs[j] == Old) { + JTE.MBBs[j] = New; + MadeChange = true; + } + return MadeChange; +} + +void MachineJumpTableInfo::print(raw_ostream &OS) const { + if (JumpTables.empty()) return; + + OS << "Jump Tables:\n"; + + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { + OS << " jt#" << i << ": "; + for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) + OS << " BB#" << JumpTables[i].MBBs[j]->getNumber(); + } + + OS << '\n'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineJumpTableInfo::dump() const { print(dbgs()); } +#endif + + +//===----------------------------------------------------------------------===// +// MachineConstantPool implementation +//===----------------------------------------------------------------------===// + +void MachineConstantPoolValue::anchor() { } + +Type *MachineConstantPoolEntry::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + + +unsigned MachineConstantPoolEntry::getRelocationInfo() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getRelocationInfo(); + return Val.ConstVal->getRelocationInfo(); +} + +MachineConstantPool::~MachineConstantPool() { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].isMachineConstantPoolEntry()) + delete Constants[i].Val.MachineCPVal; + for (DenseSet<MachineConstantPoolValue*>::iterator I = + MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end(); + I != E; ++I) + delete *I; +} + +/// CanShareConstantPoolEntry - Test whether the given two constants +/// can be allocated the same constant pool entry. +static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, + const DataLayout *TD) { + // Handle the trivial case quickly. + if (A == B) return true; + + // If they have the same type but weren't the same constant, quickly + // reject them. + if (A->getType() == B->getType()) return false; + + // We can't handle structs or arrays. + if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) || + isa<StructType>(B->getType()) || isa<ArrayType>(B->getType())) + return false; + + // For now, only support constants with the same size. + uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); + if (StoreSize != TD->getTypeStoreSize(B->getType()) || + StoreSize > 128) + return false; + + Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); + + // Try constant folding a bitcast of both instructions to an integer. If we + // get two identical ConstantInt's, then we are good to share them. We use + // the constant folding APIs to do this so that we get the benefit of + // DataLayout. + if (isa<PointerType>(A->getType())) + A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast<Constant*>(A), TD); + else if (A->getType() != IntTy) + A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast<Constant*>(A), TD); + if (isa<PointerType>(B->getType())) + B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast<Constant*>(B), TD); + else if (B->getType() != IntTy) + B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast<Constant*>(B), TD); + + return A == B; +} + +/// getConstantPoolIndex - Create a new entry in the constant pool or return +/// an existing one. User must specify the log2 of the minimum required +/// alignment for the object. +/// +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (!Constants[i].isMachineConstantPoolEntry() && + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) { + if ((unsigned)Constants[i].getAlignment() < Alignment) + Constants[i].Alignment = Alignment; + return i; + } + + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); + return Constants.size()-1; +} + +unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + int Idx = V->getExistingMachineCPValue(this, Alignment); + if (Idx != -1) { + MachineCPVsSharingEntries.insert(V); + return (unsigned)Idx; + } + + Constants.push_back(MachineConstantPoolEntry(V, Alignment)); + return Constants.size()-1; +} + +void MachineConstantPool::print(raw_ostream &OS) const { + if (Constants.empty()) return; + + OS << "Constant Pool:\n"; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + OS << " cp#" << i << ": "; + if (Constants[i].isMachineConstantPoolEntry()) + Constants[i].Val.MachineCPVal->print(OS); + else + OS << *(const Value*)Constants[i].Val.ConstVal; + OS << ", align=" << Constants[i].getAlignment(); + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineConstantPool::dump() const { print(dbgs()); } +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp new file mode 100644 index 0000000..35591e1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -0,0 +1,57 @@ +//===-- MachineFunctionAnalysis.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionAnalysis members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +using namespace llvm; + +char MachineFunctionAnalysis::ID = 0; + +MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) : + FunctionPass(ID), TM(tm), MF(0) { + initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); +} + +MachineFunctionAnalysis::~MachineFunctionAnalysis() { + releaseMemory(); + assert(!MF && "MachineFunctionAnalysis left initialized!"); +} + +void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); +} + +bool MachineFunctionAnalysis::doInitialization(Module &M) { + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + assert(MMI && "MMI not around yet??"); + MMI->setModule(&M); + NextFnNum = 0; + return false; +} + + +bool MachineFunctionAnalysis::runOnFunction(Function &F) { + assert(!MF && "MachineFunctionAnalysis already initialized!"); + MF = new MachineFunction(&F, TM, NextFnNum++, + getAnalysis<MachineModuleInfo>(), + getAnalysisIfAvailable<GCModuleInfo>()); + return false; +} + +void MachineFunctionAnalysis::releaseMemory() { + delete MF; + MF = 0; +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp new file mode 100644 index 0000000..674cc80 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -0,0 +1,56 @@ +//===-- MachineFunctionPass.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionPass members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Function.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +using namespace llvm; + +Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return createMachineFunctionPrinterPass(O, Banner); +} + +bool MachineFunctionPass::runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF(); + return runOnMachineFunction(MF); +} + +void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + + // MachineFunctionPass preserves all LLVM IR passes, but there's no + // high-level way to express this. Instead, just list a bunch of + // passes explicitly. This does not include setPreservesCFG, + // because CodeGen overloads that to mean preserving the MachineBasicBlock + // CFG in addition to the LLVM IR CFG. + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("memdep"); + AU.addPreserved("live-values"); + AU.addPreserved("domtree"); + AU.addPreserved("domfrontier"); + AU.addPreserved("loops"); + AU.addPreserved("lda"); + + FunctionPass::getAnalysisUsage(AU); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp new file mode 100644 index 0000000..fa9c821 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -0,0 +1,67 @@ +//===-- MachineFunctionPrinterPass.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineFunctionPrinterPass implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +/// MachineFunctionPrinterPass - This is a pass to dump the IR of a +/// MachineFunction. +/// +struct MachineFunctionPrinterPass : public MachineFunctionPass { + static char ID; + + raw_ostream &OS; + const std::string Banner; + + MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { } + MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) + : MachineFunctionPass(ID), OS(os), Banner(banner) {} + + const char *getPassName() const { return "MachineFunction Printer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) { + OS << "# " << Banner << ":\n"; + MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); + return false; + } +}; + +char MachineFunctionPrinterPass::ID = 0; +} + +char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; +INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs", + "Machine Function Printer", false, false) + +namespace llvm { +/// Returns a newly-created MachineFunction Printer pass. The +/// default banner is empty. +/// +MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS, + const std::string &Banner){ + return new MachineFunctionPrinterPass(OS, Banner); +} + +} diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp new file mode 100644 index 0000000..32d0668 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -0,0 +1,1867 @@ +//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Methods common to all machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineOperand Implementation +//===----------------------------------------------------------------------===// + +void MachineOperand::setReg(unsigned Reg) { + if (getReg() == Reg) return; // No change. + + // Otherwise, we have to change the register. If this operand is embedded + // into a machine function, we need to update the old and new register's + // use/def lists. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + SmallContents.RegNo = Reg; + MRI.addRegOperandToUseList(this); + return; + } + + // Otherwise, just change the register, no problem. :) + SmallContents.RegNo = Reg; +} + +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + // Note that getSubReg() may return 0 if the sub-register doesn't exist. + // That won't happen in legal code. + setSubReg(0); + } + setReg(Reg); +} + +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + // MRI may keep uses and defs in different list positions. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + +/// ChangeToImmediate - Replace this operand with a new immediate operand of +/// the specified value. If an operand is known to be an immediate already, +/// the setImm method should be used. +void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + // If this operand is currently a register operand, and if this is in a + // function, deregister the operand from the register's use/def list. + if (isReg() && isOnRegUseList()) + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); + + OpKind = MO_Immediate; + Contents.ImmVal = ImmVal; +} + +/// ChangeToRegister - Replace this operand with a new register operand of +/// the specified value. If an operand is known to be an register already, +/// the setReg method should be used. +void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, + bool isKill, bool isDead, bool isUndef, + bool isDebug) { + MachineRegisterInfo *RegInfo = 0; + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the + // register's use/def lists. + bool WasReg = isReg(); + if (RegInfo && WasReg) + RegInfo->removeRegOperandFromUseList(this); + + // Change this to a register and set the reg#. + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg_TargetFlags = 0; + IsDef = isDef; + IsImp = isImp; + IsKill = isKill; + IsDead = isDead; + IsUndef = isUndef; + IsInternalRead = false; + IsEarlyClobber = false; + IsDebug = isDebug; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = 0; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. Note that this should stay in sync with the hash_value overload +/// below. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType() || + getTargetFlags() != Other.getTargetFlags()) + return false; + + switch (getType()) { + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef() && + getSubReg() == Other.getSubReg(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_CImmediate: + return getCImm() == Other.getCImm(); + case MachineOperand::MO_FPImmediate: + return getFPImm() == Other.getFPImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return !strcmp(getSymbolName(), Other.getSymbolName()) && + getOffset() == Other.getOffset(); + case MachineOperand::MO_BlockAddress: + return getBlockAddress() == Other.getBlockAddress() && + getOffset() == Other.getOffset(); + case MO_RegisterMask: + return getRegMask() == Other.getRegMask(); + case MachineOperand::MO_MCSymbol: + return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_Metadata: + return getMetadata() == Other.getMetadata(); + } + llvm_unreachable("Invalid machine operand type"); +} + +// Note: this must stay exactly in sync with isIdenticalTo above. +hash_code llvm::hash_value(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); + case MachineOperand::MO_Immediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); + case MachineOperand::MO_FPImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); + case MachineOperand::MO_MachineBasicBlock: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); + case MachineOperand::MO_FrameIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), + MO.getOffset()); + case MachineOperand::MO_JumpTableIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + MO.getSymbolName()); + case MachineOperand::MO_GlobalAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), + MO.getOffset()); + case MachineOperand::MO_BlockAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getBlockAddress(), MO.getOffset()); + case MachineOperand::MO_RegisterMask: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_Metadata: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); + case MachineOperand::MO_MCSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + } + llvm_unreachable("Invalid machine operand type"); +} + +/// print - Print the specified machine operand. +/// +void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { + // If the instruction is embedded into a basic block, we can find the + // target info for the instruction. + if (!TM) + if (const MachineInstr *MI = getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + TM = &MF->getTarget(); + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; + + switch (getType()) { + case MachineOperand::MO_Register: + OS << PrintReg(getReg(), TRI, getSubReg()); + + if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || + isInternalRead() || isEarlyClobber() || isTied()) { + OS << '<'; + bool NeedComma = false; + if (isDef()) { + if (NeedComma) OS << ','; + if (isEarlyClobber()) + OS << "earlyclobber,"; + if (isImplicit()) + OS << "imp-"; + OS << "def"; + NeedComma = true; + // <def,read-undef> only makes sense when getSubReg() is set. + // Don't clutter the output otherwise. + if (isUndef() && getSubReg()) + OS << ",read-undef"; + } else if (isImplicit()) { + OS << "imp-use"; + NeedComma = true; + } + + if (isKill()) { + if (NeedComma) OS << ','; + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + if (NeedComma) OS << ','; + OS << "dead"; + NeedComma = true; + } + if (isUndef() && isUse()) { + if (NeedComma) OS << ','; + OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; + } + if (isTied()) { + if (NeedComma) OS << ','; + OS << "tied"; + if (TiedTo != 15) + OS << unsigned(TiedTo - 1); + NeedComma = true; + } + OS << '>'; + } + break; + case MachineOperand::MO_Immediate: + OS << getImm(); + break; + case MachineOperand::MO_CImmediate: + getCImm()->getValue().print(OS, false); + break; + case MachineOperand::MO_FPImmediate: + if (getFPImm()->getType()->isFloatTy()) + OS << getFPImm()->getValueAPF().convertToFloat(); + else + OS << getFPImm()->getValueAPF().convertToDouble(); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << "<BB#" << getMBB()->getNumber() << ">"; + break; + case MachineOperand::MO_FrameIndex: + OS << "<fi#" << getIndex() << '>'; + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "<cp#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_TargetIndex: + OS << "<ti#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_JumpTableIndex: + OS << "<jt#" << getIndex() << '>'; + break; + case MachineOperand::MO_GlobalAddress: + OS << "<ga:"; + WriteAsOperand(OS, getGlobal(), /*PrintType=*/false); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_ExternalSymbol: + OS << "<es:" << getSymbolName(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_BlockAddress: + OS << '<'; + WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_RegisterMask: + OS << "<regmask>"; + break; + case MachineOperand::MO_Metadata: + OS << '<'; + WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); + OS << '>'; + break; + case MachineOperand::MO_MCSymbol: + OS << "<MCSym=" << *getMCSymbol() << '>'; + break; + } + + if (unsigned TF = getTargetFlags()) + OS << "[TF=" << TF << ']'; +} + +//===----------------------------------------------------------------------===// +// MachineMemOperand Implementation +//===----------------------------------------------------------------------===// + +/// getAddrSpace - Return the LLVM IR address space number that this pointer +/// points into. +unsigned MachinePointerInfo::getAddrSpace() const { + if (V == 0) return 0; + return cast<PointerType>(V->getType())->getAddressSpace(); +} + +/// getConstantPool - Return a MachinePointerInfo record that refers to the +/// constant pool. +MachinePointerInfo MachinePointerInfo::getConstantPool() { + return MachinePointerInfo(PseudoSourceValue::getConstantPool()); +} + +/// getFixedStack - Return a MachinePointerInfo record that refers to the +/// the specified FrameIndex. +MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) { + return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset); +} + +MachinePointerInfo MachinePointerInfo::getJumpTable() { + return MachinePointerInfo(PseudoSourceValue::getJumpTable()); +} + +MachinePointerInfo MachinePointerInfo::getGOT() { + return MachinePointerInfo(PseudoSourceValue::getGOT()); +} + +MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { + return MachinePointerInfo(PseudoSourceValue::getStack(), Offset); +} + +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, + uint64_t s, unsigned int a, + const MDNode *TBAAInfo, + const MDNode *Ranges) + : PtrInfo(ptrinfo), Size(s), + Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), + TBAAInfo(TBAAInfo), Ranges(Ranges) { + assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) && + "invalid pointer value"); + assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); + assert((isLoad() || isStore()) && "Not a load/store!"); +} + +/// Profile - Gather unique data for the object. +/// +void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(getOffset()); + ID.AddInteger(Size); + ID.AddPointer(getValue()); + ID.AddInteger(Flags); +} + +void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { + // The Value and Offset may differ due to CSE. But the flags and size + // should be the same. + assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); + assert(MMO->getSize() == getSize() && "Size mismatch!"); + + if (MMO->getBaseAlignment() >= getBaseAlignment()) { + // Update the alignment value. + Flags = (Flags & ((1 << MOMaxBits) - 1)) | + ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits); + // Also update the base and offset, because the new alignment may + // not be applicable with the old ones. + PtrInfo = MMO->PtrInfo; + } +} + +/// getAlignment - Return the minimum known alignment in bytes of the +/// actual memory reference. +uint64_t MachineMemOperand::getAlignment() const { + return MinAlign(getBaseAlignment(), getOffset()); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { + assert((MMO.isLoad() || MMO.isStore()) && + "SV has to be a load, store or both."); + + if (MMO.isVolatile()) + OS << "Volatile "; + + if (MMO.isLoad()) + OS << "LD"; + if (MMO.isStore()) + OS << "ST"; + OS << MMO.getSize(); + + // Print the address information. + OS << "["; + if (!MMO.getValue()) + OS << "<unknown>"; + else + WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false); + + // If the alignment of the memory reference itself differs from the alignment + // of the base pointer, print the base alignment explicitly, next to the base + // pointer. + if (MMO.getBaseAlignment() != MMO.getAlignment()) + OS << "(align=" << MMO.getBaseAlignment() << ")"; + + if (MMO.getOffset() != 0) + OS << "+" << MMO.getOffset(); + OS << "]"; + + // Print the alignment of the reference. + if (MMO.getBaseAlignment() != MMO.getAlignment() || + MMO.getBaseAlignment() != MMO.getSize()) + OS << "(align=" << MMO.getAlignment() << ")"; + + // Print TBAA info. + if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) { + OS << "(tbaa="; + if (TBAAInfo->getNumOperands() > 0) + WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false); + else + OS << "<unknown>"; + OS << ")"; + } + + // Print nontemporal info. + if (MMO.isNonTemporal()) + OS << "(nontemporal)"; + + return OS; +} + +//===----------------------------------------------------------------------===// +// MachineInstr Implementation +//===----------------------------------------------------------------------===// + +void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { + if (MCID->ImplicitDefs) + for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); + if (MCID->ImplicitUses) + for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) + addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); +} + +/// MachineInstr ctor - This constructor creates a MachineInstr and adds the +/// implicit operands. It reserves space for the number of operands specified by +/// the MCInstrDesc. +MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, + const DebugLoc dl, bool NoImp) + : MCID(&tid), Parent(0), Operands(0), NumOperands(0), + Flags(0), AsmPrinterFlags(0), + NumMemRefs(0), MemRefs(0), debugLoc(dl) { + // Reserve space for the expected number of operands. + if (unsigned NumOps = MCID->getNumOperands() + + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { + CapOperands = OperandCapacity::get(NumOps); + Operands = MF.allocateOperandArray(CapOperands); + } + + if (!NoImp) + addImplicitDefUseOperands(MF); +} + +/// MachineInstr ctor - Copies MachineInstr arg exactly +/// +MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) + : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0), + Flags(0), AsmPrinterFlags(0), + NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), + debugLoc(MI.getDebugLoc()) { + CapOperands = OperandCapacity::get(MI.getNumOperands()); + Operands = MF.allocateOperandArray(CapOperands); + + // Copy operands. + for (unsigned i = 0; i != MI.getNumOperands(); ++i) + addOperand(MF, MI.getOperand(i)); + + // Copy all the sensible flags. + setFlags(MI.Flags); +} + +/// getRegInfo - If this instruction is embedded into a MachineFunction, +/// return the MachineRegisterInfo object for the current function, otherwise +/// return null. +MachineRegisterInfo *MachineInstr::getRegInfo() { + if (MachineBasicBlock *MBB = getParent()) + return &MBB->getParent()->getRegInfo(); + return 0; +} + +/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands already be on their use lists. +void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (Operands[i].isReg()) + MRI.removeRegOperandFromUseList(&Operands[i]); +} + +/// AddRegOperandsToUseLists - Add all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands not be on their use lists yet. +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (Operands[i].isReg()) + MRI.addRegOperandToUseList(&Operands[i]); +} + +void MachineInstr::addOperand(const MachineOperand &Op) { + MachineBasicBlock *MBB = getParent(); + assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs"); + MachineFunction *MF = MBB->getParent(); + assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs"); + addOperand(*MF, Op); +} + +/// Move NumOps MachineOperands from Src to Dst, with support for overlapping +/// ranges. If MRI is non-null also update use-def chains. +static void moveOperands(MachineOperand *Dst, MachineOperand *Src, + unsigned NumOps, MachineRegisterInfo *MRI) { + if (MRI) + return MRI->moveOperands(Dst, Src, NumOps); + + // Here it would be convenient to call memmove, so that isn't allowed because + // MachineOperand has a constructor and so isn't a POD type. + if (Dst < Src) + for (unsigned i = 0; i != NumOps; ++i) + new (Dst + i) MachineOperand(Src[i]); + else + for (unsigned i = NumOps; i ; --i) + new (Dst + i - 1) MachineOperand(Src[i - 1]); +} + +/// addOperand - Add the specified operand to the instruction. If it is an +/// implicit operand, it is added to the end of the operand list. If it is +/// an explicit operand it is added at the end of the explicit operand list +/// (before the first implicit operand). +void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { + assert(MCID && "Cannot add operands before providing an instr descriptor"); + + // Check if we're adding one of our existing operands. + if (&Op >= Operands && &Op < Operands + NumOperands) { + // This is unusual: MI->addOperand(MI->getOperand(i)). + // If adding Op requires reallocating or moving existing operands around, + // the Op reference could go stale. Support it by copying Op. + MachineOperand CopyOp(Op); + return addOperand(MF, CopyOp); + } + + // Find the insert location for the new operand. Implicit registers go at + // the end, everything else goes before the implicit regs. + // + // FIXME: Allow mixed explicit and implicit operands on inline asm. + // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as + // implicit-defs, but they must not be moved around. See the FIXME in + // InstrEmitter.cpp. + unsigned OpNo = getNumOperands(); + bool isImpReg = Op.isReg() && Op.isImplicit(); + if (!isImpReg && !isInlineAsm()) { + while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { + --OpNo; + assert(!Operands[OpNo].isTied() && "Cannot move tied operands"); + } + } + + // OpNo now points as the desired insertion point. Unless this is a variadic + // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). + // RegMask operands go between the explicit and implicit operands. + assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || + OpNo < MCID->getNumOperands()) && + "Trying to add an operand to a machine instr that is already done!"); + + MachineRegisterInfo *MRI = getRegInfo(); + + // Determine if the Operands array needs to be reallocated. + // Save the old capacity and operand array. + OperandCapacity OldCap = CapOperands; + MachineOperand *OldOperands = Operands; + if (!OldOperands || OldCap.getSize() == getNumOperands()) { + CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1); + Operands = MF.allocateOperandArray(CapOperands); + // Move the operands before the insertion point. + if (OpNo) + moveOperands(Operands, OldOperands, OpNo, MRI); + } + + // Move the operands following the insertion point. + if (OpNo != NumOperands) + moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo, + MRI); + ++NumOperands; + + // Deallocate the old operand array. + if (OldOperands != Operands && OldOperands) + MF.deallocateOperandArray(OldCap, OldOperands); + + // Copy Op into place. It still needs to be inserted into the MRI use lists. + MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op); + NewMO->ParentMI = this; + + // When adding a register operand, tell MRI about it. + if (NewMO->isReg()) { + // Ensure isOnRegUseList() returns false, regardless of Op's status. + NewMO->Contents.Reg.Prev = 0; + // Ignore existing ties. This is not a property that can be copied. + NewMO->TiedTo = 0; + // Add the new operand to MRI, but only for instructions in an MBB. + if (MRI) + MRI->addRegOperandToUseList(NewMO); + // The MCID operand information isn't accurate until we start adding + // explicit operands. The implicit operands are added first, then the + // explicits are inserted before them. + if (!isImpReg) { + // Tie uses to defs as indicated in MCInstrDesc. + if (NewMO->isUse()) { + int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO); + if (DefIdx != -1) + tieOperands(DefIdx, OpNo); + } + // If the register operand is flagged as early, mark the operand as such. + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) + NewMO->setIsEarlyClobber(true); + } + } +} + +/// RemoveOperand - Erase an operand from an instruction, leaving it with one +/// fewer operand than it started with. +/// +void MachineInstr::RemoveOperand(unsigned OpNo) { + assert(OpNo < getNumOperands() && "Invalid operand number"); + untieRegOperand(OpNo); + +#ifndef NDEBUG + // Moving tied operands would break the ties. + for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i) + if (Operands[i].isReg()) + assert(!Operands[i].isTied() && "Cannot move tied operands"); +#endif + + MachineRegisterInfo *MRI = getRegInfo(); + if (MRI && Operands[OpNo].isReg()) + MRI->removeRegOperandFromUseList(Operands + OpNo); + + // Don't call the MachineOperand destructor. A lot of this code depends on + // MachineOperand having a trivial destructor anyway, and adding a call here + // wouldn't make it 'destructor-correct'. + + if (unsigned N = NumOperands - 1 - OpNo) + moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI); + --NumOperands; +} + +/// addMemOperand - Add a MachineMemOperand to the machine instruction. +/// This function should be used only occasionally. The setMemRefs function +/// is the primary method for setting up a MachineInstr's MemRefs list. +void MachineInstr::addMemOperand(MachineFunction &MF, + MachineMemOperand *MO) { + mmo_iterator OldMemRefs = MemRefs; + unsigned OldNumMemRefs = NumMemRefs; + + unsigned NewNum = NumMemRefs + 1; + mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); + + std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); + NewMemRefs[NewNum - 1] = MO; + setMemRefs(NewMemRefs, NewMemRefs + NewNum); +} + +bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { + assert(!isBundledWithPred() && "Must be called on bundle header"); + for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) { + if (MII->getDesc().getFlags() & Mask) { + if (Type == AnyInBundle) + return true; + } else { + if (Type == AllInBundle && !MII->isBundle()) + return false; + } + // This was the last instruction in the bundle. + if (!MII->isBundledWithSucc()) + return Type == AllInBundle; + } +} + +bool MachineInstr::isIdenticalTo(const MachineInstr *Other, + MICheckType Check) const { + // If opcodes or number of operands are not the same then the two + // instructions are obviously not identical. + if (Other->getOpcode() != getOpcode() || + Other->getNumOperands() != getNumOperands()) + return false; + + if (isBundle()) { + // Both instructions are bundles, compare MIs inside the bundle. + MachineBasicBlock::const_instr_iterator I1 = *this; + MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I2 = *Other; + MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); + while (++I1 != E1 && I1->isInsideBundle()) { + ++I2; + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check)) + return false; + } + } + + // Check operands to make sure they match. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + const MachineOperand &OMO = Other->getOperand(i); + if (!MO.isReg()) { + if (!MO.isIdenticalTo(OMO)) + return false; + continue; + } + + // Clients may or may not want to ignore defs when testing for equality. + // For example, machine CSE pass only cares about finding common + // subexpressions, so it's safe to ignore virtual register defs. + if (MO.isDef()) { + if (Check == IgnoreDefs) + continue; + else if (Check == IgnoreVRegDefs) { + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) + if (MO.getReg() != OMO.getReg()) + return false; + } else { + if (!MO.isIdenticalTo(OMO)) + return false; + if (Check == CheckKillDead && MO.isDead() != OMO.isDead()) + return false; + } + } else { + if (!MO.isIdenticalTo(OMO)) + return false; + if (Check == CheckKillDead && MO.isKill() != OMO.isKill()) + return false; + } + } + // If DebugLoc does not match then two dbg.values are not identical. + if (isDebugValue()) + if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown() + && getDebugLoc() != Other->getDebugLoc()) + return false; + return true; +} + +MachineInstr *MachineInstr::removeFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + return getParent()->remove(this); +} + +MachineInstr *MachineInstr::removeFromBundle() { + assert(getParent() && "Not embedded in a basic block!"); + return getParent()->remove_instr(this); +} + +void MachineInstr::eraseFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase(this); +} + +void MachineInstr::eraseFromBundle() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase_instr(this); +} + +/// getNumExplicitOperands - Returns the number of non-implicit operands. +/// +unsigned MachineInstr::getNumExplicitOperands() const { + unsigned NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic()) + return NumOperands; + + for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isImplicit()) + NumOperands++; + } + return NumOperands; +} + +void MachineInstr::bundleWithPred() { + assert(!isBundledWithPred() && "MI is already bundled with its predecessor"); + setFlag(BundledPred); + MachineBasicBlock::instr_iterator Pred = this; + --Pred; + assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags"); + Pred->setFlag(BundledSucc); +} + +void MachineInstr::bundleWithSucc() { + assert(!isBundledWithSucc() && "MI is already bundled with its successor"); + setFlag(BundledSucc); + MachineBasicBlock::instr_iterator Succ = this; + ++Succ; + assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags"); + Succ->setFlag(BundledPred); +} + +void MachineInstr::unbundleFromPred() { + assert(isBundledWithPred() && "MI isn't bundled with its predecessor"); + clearFlag(BundledPred); + MachineBasicBlock::instr_iterator Pred = this; + --Pred; + assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags"); + Pred->clearFlag(BundledSucc); +} + +void MachineInstr::unbundleFromSucc() { + assert(isBundledWithSucc() && "MI isn't bundled with its successor"); + clearFlag(BundledSucc); + MachineBasicBlock::instr_iterator Succ = this; + ++Succ; + assert(Succ->isBundledWithPred() && "Inconsistent bundle flags"); + Succ->clearFlag(BundledPred); +} + +bool MachineInstr::isStackAligningInlineAsm() const { + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + return true; + } + return false; +} + +InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const { + assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!"); + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0); +} + +int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, + unsigned *GroupNo) const { + assert(isInlineAsm() && "Expected an inline asm instruction"); + assert(OpIdx < getNumOperands() && "OpIdx out of range"); + + // Ignore queries about the initial operands. + if (OpIdx < InlineAsm::MIOp_FirstOperand) + return -1; + + unsigned Group = 0; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + // If we reach the implicit register operands, stop looking. + if (!FlagMO.isImm()) + return -1; + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + if (i + NumOps > OpIdx) { + if (GroupNo) + *GroupNo = Group; + return i; + } + ++Group; + } + return -1; +} + +const TargetRegisterClass* +MachineInstr::getRegClassConstraint(unsigned OpIdx, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) const { + assert(getParent() && "Can't have an MBB reference here!"); + assert(getParent()->getParent() && "Can't have an MF reference here!"); + const MachineFunction &MF = *getParent()->getParent(); + + // Most opcodes have fixed constraints in their MCInstrDesc. + if (!isInlineAsm()) + return TII->getRegClass(getDesc(), OpIdx, TRI, MF); + + if (!getOperand(OpIdx).isReg()) + return NULL; + + // For tied uses on inline asm, get the constraint from the def. + unsigned DefIdx; + if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx)) + OpIdx = DefIdx; + + // Inline asm stores register class constraints in the flag word. + int FlagIdx = findInlineAsmFlagIdx(OpIdx); + if (FlagIdx < 0) + return NULL; + + unsigned Flag = getOperand(FlagIdx).getImm(); + unsigned RCID; + if (InlineAsm::hasRegClassConstraint(Flag, RCID)) + return TRI->getRegClass(RCID); + + // Assume that all registers in a memory operand are pointers. + if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem) + return TRI->getPointerRegClass(MF); + + return NULL; +} + +/// Return the number of instructions inside the MI bundle, not counting the +/// header instruction. +unsigned MachineInstr::getBundleSize() const { + MachineBasicBlock::const_instr_iterator I = this; + unsigned Size = 0; + while (I->isBundledWithSucc()) + ++Size, ++I; + return Size; +} + +/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of +/// the specific register or -1 if it is not found. It further tightens +/// the search criteria to a use that kills the register if isKill is true. +int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, + const TargetRegisterInfo *TRI) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TRI && + TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) + if (!isKill || MO.isKill()) + return i; + } + return -1; +} + +/// readsWritesVirtualRegister - Return a pair of bools (reads, writes) +/// indicating if this instruction reads or writes Reg. This also considers +/// partial defines. +std::pair<bool,bool> +MachineInstr::readsWritesVirtualRegister(unsigned Reg, + SmallVectorImpl<unsigned> *Ops) const { + bool PartDef = false; // Partial redefine. + bool FullDef = false; // Full define. + bool Use = false; + + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (Ops) + Ops->push_back(i); + if (MO.isUse()) + Use |= !MO.isUndef(); + else if (MO.getSubReg() && !MO.isUndef()) + // A partial <def,undef> doesn't count as reading the register. + PartDef = true; + else + FullDef = true; + } + // A partial redefine uses Reg unless there is also a full define. + return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef); +} + +/// findRegisterDefOperandIdx() - Returns the operand index that is a def of +/// the specified register or -1 if it is not found. If isDead is true, defs +/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it +/// also checks if there is a def of a super-register. +int +MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, + const TargetRegisterInfo *TRI) const { + bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + // Accept regmask operands when Overlap is set. + // Ignore them when looking for a specific def operand (Overlap == false). + if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg)) + return i; + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + bool Found = (MOReg == Reg); + if (!Found && TRI && isPhys && + TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (Overlap) + Found = TRI->regsOverlap(MOReg, Reg); + else + Found = TRI->isSubRegister(MOReg, Reg); + } + if (Found && (!isDead || MO.isDead())) + return i; + } + return -1; +} + +/// findFirstPredOperandIdx() - Find the index of the first operand in the +/// operand list that is used to represent the predicate. It returns -1 if +/// none is found. +int MachineInstr::findFirstPredOperandIdx() const { + // Don't call MCID.findFirstPredOperandIdx() because this variant + // is sometimes called on an instruction that's not yet complete, and + // so the number of operands is less than the MCID indicates. In + // particular, the PTX target does this. + const MCInstrDesc &MCID = getDesc(); + if (MCID.isPredicable()) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (MCID.OpInfo[i].isPredicate()) + return i; + } + + return -1; +} + +// MachineOperand::TiedTo is 4 bits wide. +const unsigned TiedMax = 15; + +/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other. +/// +/// Use and def operands can be tied together, indicated by a non-zero TiedTo +/// field. TiedTo can have these values: +/// +/// 0: Operand is not tied to anything. +/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1). +/// TiedMax: Tied to an operand >= TiedMax-1. +/// +/// The tied def must be one of the first TiedMax operands on a normal +/// instruction. INLINEASM instructions allow more tied defs. +/// +void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) { + MachineOperand &DefMO = getOperand(DefIdx); + MachineOperand &UseMO = getOperand(UseIdx); + assert(DefMO.isDef() && "DefIdx must be a def operand"); + assert(UseMO.isUse() && "UseIdx must be a use operand"); + assert(!DefMO.isTied() && "Def is already tied to another use"); + assert(!UseMO.isTied() && "Use is already tied to another def"); + + if (DefIdx < TiedMax) + UseMO.TiedTo = DefIdx + 1; + else { + // Inline asm can use the group descriptors to find tied operands, but on + // normal instruction, the tied def must be within the first TiedMax + // operands. + assert(isInlineAsm() && "DefIdx out of range"); + UseMO.TiedTo = TiedMax; + } + + // UseIdx can be out of range, we'll search for it in findTiedOperandIdx(). + DefMO.TiedTo = std::min(UseIdx + 1, TiedMax); +} + +/// Given the index of a tied register operand, find the operand it is tied to. +/// Defs are tied to uses and vice versa. Returns the index of the tied operand +/// which must exist. +unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isTied() && "Operand isn't tied"); + + // Normally TiedTo is in range. + if (MO.TiedTo < TiedMax) + return MO.TiedTo - 1; + + // Uses on normal instructions can be out of range. + if (!isInlineAsm()) { + // Normal tied defs must be in the 0..TiedMax-1 range. + if (MO.isUse()) + return TiedMax - 1; + // MO is a def. Search for the tied use. + for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) { + const MachineOperand &UseMO = getOperand(i); + if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1) + return i; + } + llvm_unreachable("Can't find tied use"); + } + + // Now deal with inline asm by parsing the operand group descriptor flags. + // Find the beginning of each operand group. + SmallVector<unsigned, 8> GroupIdx; + unsigned OpIdxGroup = ~0u; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + assert(FlagMO.isImm() && "Invalid tied operand on inline asm"); + unsigned CurGroup = GroupIdx.size(); + GroupIdx.push_back(i); + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + // OpIdx belongs to this operand group. + if (OpIdx > i && OpIdx < i + NumOps) + OpIdxGroup = CurGroup; + unsigned TiedGroup; + if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup)) + continue; + // Operands in this group are tied to operands in TiedGroup which must be + // earlier. Find the number of operands between the two groups. + unsigned Delta = i - GroupIdx[TiedGroup]; + + // OpIdx is a use tied to TiedGroup. + if (OpIdxGroup == CurGroup) + return OpIdx - Delta; + + // OpIdx is a def tied to this use group. + if (OpIdxGroup == TiedGroup) + return OpIdx + Delta; + } + llvm_unreachable("Invalid tied operand on inline asm"); +} + +/// clearKillInfo - Clears kill flags on all operands. +/// +void MachineInstr::clearKillInfo() { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse()) + MO.setIsKill(false); + } +} + +void MachineInstr::substituteRegister(unsigned FromReg, + unsigned ToReg, + unsigned SubIdx, + const TargetRegisterInfo &RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (SubIdx) + ToReg = RegInfo.getSubReg(ToReg, SubIdx); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substPhysReg(ToReg, RegInfo); + } + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substVirtReg(ToReg, SubIdx, RegInfo); + } + } +} + +/// isSafeToMove - Return true if it is safe to move this instruction. If +/// SawStore is set to true, it means that there is a store (or call) between +/// the instruction's location and its intended destination. +bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, + AliasAnalysis *AA, + bool &SawStore) const { + // Ignore stuff that we obviously can't move. + // + // Treat volatile loads as stores. This is not strictly necessary for + // volatiles, but it is required for atomic loads. It is not allowed to move + // a load across an atomic load with Ordering > Monotonic. + if (mayStore() || isCall() || + (mayLoad() && hasOrderedMemoryRef())) { + SawStore = true; + return false; + } + + if (isLabel() || isDebugValue() || + isTerminator() || hasUnmodeledSideEffects()) + return false; + + // See if this instruction does a load. If so, we have to guarantee that the + // loaded value doesn't change between the load and the its intended + // destination. The check for isInvariantLoad gives the targe the chance to + // classify the load as always returning a constant, e.g. a constant pool + // load. + if (mayLoad() && !isInvariantLoad(AA)) + // Otherwise, this is a real load. If there is a store between the load and + // end of block, we can't move it. + return !SawStore; + + return true; +} + +/// isSafeToReMat - Return true if it's safe to rematerialize the specified +/// instruction which defined the specified register instead of copying it. +bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, + AliasAnalysis *AA, + unsigned DstReg) const { + bool SawStore = false; + if (!TII->isTriviallyReMaterializable(this, AA) || + !isSafeToMove(TII, AA, SawStore)) + return false; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg()) + continue; + // FIXME: For now, do not remat any instruction with register operands. + // Later on, we can loosen the restriction is the register operands have + // not been modified between the def and use. Note, this is different from + // MachineSink because the code is no longer in two-address form (at least + // partially). + if (MO.isUse()) + return false; + else if (!MO.isDead() && MO.getReg() != DstReg) + return false; + } + return true; +} + +/// hasOrderedMemoryRef - Return true if this instruction may have an ordered +/// or volatile memory reference, or if the information describing the memory +/// reference is not available. Return false if it is known to have no ordered +/// memory references. +bool MachineInstr::hasOrderedMemoryRef() const { + // An instruction known never to access memory won't have a volatile access. + if (!mayStore() && + !mayLoad() && + !isCall() && + !hasUnmodeledSideEffects()) + return false; + + // Otherwise, if the instruction has no memory reference information, + // conservatively assume it wasn't preserved. + if (memoperands_empty()) + return true; + + // Check the memory reference information for ordered references. + for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) + if (!(*I)->isUnordered()) + return true; + + return false; +} + +/// isInvariantLoad - Return true if this instruction is loading from a +/// location whose value is invariant across the function. For example, +/// loading a value from the constant pool or from the argument area +/// of a function if it does not change. This should only return true of +/// *all* loads the instruction does are invariant (if it does multiple loads). +bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { + // If the instruction doesn't load at all, it isn't an invariant load. + if (!mayLoad()) + return false; + + // If the instruction has lost its memoperands, conservatively assume that + // it may not be an invariant load. + if (memoperands_empty()) + return false; + + const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); + + for (mmo_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) { + if ((*I)->isVolatile()) return false; + if ((*I)->isStore()) return false; + if ((*I)->isInvariant()) return true; + + if (const Value *V = (*I)->getValue()) { + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) + if (PSV->isConstant(MFI)) + continue; + // If we have an AliasAnalysis, ask it whether the memory is constant. + if (AA && AA->pointsToConstantMemory( + AliasAnalysis::Location(V, (*I)->getSize(), + (*I)->getTBAAInfo()))) + continue; + } + + // Otherwise assume conservatively. + return false; + } + + // Everything checks out. + return true; +} + +/// isConstantValuePHI - If the specified instruction is a PHI that always +/// merges together the same virtual register, return the register, otherwise +/// return 0. +unsigned MachineInstr::isConstantValuePHI() const { + if (!isPHI()) + return 0; + assert(getNumOperands() >= 3 && + "It's illegal to have a PHI without source operands"); + + unsigned Reg = getOperand(1).getReg(); + for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) + if (getOperand(i).getReg() != Reg) + return 0; + return Reg; +} + +bool MachineInstr::hasUnmodeledSideEffects() const { + if (hasProperty(MCID::UnmodeledSideEffects)) + return true; + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_HasSideEffects) + return true; + } + + return false; +} + +/// allDefsAreDead - Return true if all the defs of this instruction are dead. +/// +bool MachineInstr::allDefsAreDead() const { + for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.isUse()) + continue; + if (!MO.isDead()) + return false; + } + return true; +} + +/// copyImplicitOps - Copy implicit register operands from specified +/// instruction to this instruction. +void MachineInstr::copyImplicitOps(MachineFunction &MF, + const MachineInstr *MI) { + for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + addOperand(MF, MO); + } +} + +void MachineInstr::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " " << *this; +#endif +} + +static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + +void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, + bool SkipOpers) const { + // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. + const MachineFunction *MF = 0; + const MachineRegisterInfo *MRI = 0; + if (const MachineBasicBlock *MBB = getParent()) { + MF = MBB->getParent(); + if (!TM && MF) + TM = &MF->getTarget(); + if (MF) + MRI = &MF->getRegInfo(); + } + + // Save a list of virtual registers. + SmallVector<unsigned, 8> VirtRegs; + + // Print explicitly defined operands on the left of an assignment syntax. + unsigned StartOp = 0, e = getNumOperands(); + for (; StartOp < e && getOperand(StartOp).isReg() && + getOperand(StartOp).isDef() && + !getOperand(StartOp).isImplicit(); + ++StartOp) { + if (StartOp != 0) OS << ", "; + getOperand(StartOp).print(OS, TM); + unsigned Reg = getOperand(StartOp).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + VirtRegs.push_back(Reg); + } + + if (StartOp != 0) + OS << " = "; + + // Print the opcode name. + if (TM && TM->getInstrInfo()) + OS << TM->getInstrInfo()->getName(getOpcode()); + else + OS << "UNKNOWN"; + + if (SkipOpers) + return; + + // Print the rest of the operands. + bool OmittedAnyCallClobbers = false; + bool FirstOp = true; + unsigned AsmDescOp = ~0u; + unsigned AsmOpCount = 0; + + if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { + // Print asm string. + OS << " "; + getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); + + // Print HasSideEffects, MayLoad, MayStore, IsAlignStack + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_HasSideEffects) + OS << " [sideeffect]"; + if (ExtraInfo & InlineAsm::Extra_MayLoad) + OS << " [mayload]"; + if (ExtraInfo & InlineAsm::Extra_MayStore) + OS << " [maystore]"; + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + OS << " [alignstack]"; + if (getInlineAsmDialect() == InlineAsm::AD_ATT) + OS << " [attdialect]"; + if (getInlineAsmDialect() == InlineAsm::AD_Intel) + OS << " [inteldialect]"; + + StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; + FirstOp = false; + } + + + for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + VirtRegs.push_back(MO.getReg()); + + // Omit call-clobbered registers which aren't used anywhere. This makes + // call instructions much less noisy on targets where calls clobber lots + // of registers. Don't rely on MO.isDead() because we may be called before + // LiveVariables is run, or we may be looking at a non-allocatable reg. + if (MF && isCall() && + MO.isReg() && MO.isImplicit() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (MRI.use_empty(Reg)) { + bool HasAliasLive = false; + for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); + AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (!MRI.use_empty(AliasReg)) { + HasAliasLive = true; + break; + } + } + if (!HasAliasLive) { + OmittedAnyCallClobbers = true; + continue; + } + } + } + } + + if (FirstOp) FirstOp = false; else OS << ","; + OS << " "; + if (i < getDesc().NumOperands) { + const MCOperandInfo &MCOI = getDesc().OpInfo[i]; + if (MCOI.isPredicate()) + OS << "pred:"; + if (MCOI.isOptionalDef()) + OS << "opt:"; + } + if (isDebugValue() && MO.isMetadata()) { + // Pretty print DBG_VALUE instructions. + const MDNode *MD = MO.getMetadata(); + if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2))) + OS << "!\"" << MDS->getString() << '\"'; + else + MO.print(OS, TM); + } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); + } else if (i == AsmDescOp && MO.isImm()) { + // Pretty print the inline asm operand descriptor. + OS << '$' << AsmOpCount++; + unsigned Flag = MO.getImm(); + switch (InlineAsm::getKind(Flag)) { + case InlineAsm::Kind_RegUse: OS << ":[reguse"; break; + case InlineAsm::Kind_RegDef: OS << ":[regdef"; break; + case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break; + case InlineAsm::Kind_Clobber: OS << ":[clobber"; break; + case InlineAsm::Kind_Imm: OS << ":[imm"; break; + case InlineAsm::Kind_Mem: OS << ":[mem"; break; + default: OS << ":[??" << InlineAsm::getKind(Flag); break; + } + + unsigned RCID = 0; + if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { + if (TM) + OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName(); + else + OS << ":RC" << RCID; + } + + unsigned TiedTo = 0; + if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) + OS << " tiedto:$" << TiedTo; + + OS << ']'; + + // Compute the index of the next operand descriptor. + AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); + } else + MO.print(OS, TM); + } + + // Briefly indicate whether any call clobbers were omitted. + if (OmittedAnyCallClobbers) { + if (!FirstOp) OS << ","; + OS << " ..."; + } + + bool HaveSemi = false; + const unsigned PrintableFlags = FrameSetup; + if (Flags & PrintableFlags) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + OS << " flags: "; + + if (Flags & FrameSetup) + OS << "FrameSetup"; + } + + if (!memoperands_empty()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + + OS << " mem:"; + for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); + i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + } + + // Print the regclass of any virtual registers encountered. + if (MRI && !VirtRegs.empty()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + for (unsigned i = 0; i != VirtRegs.size(); ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); + OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]); + for (unsigned j = i+1; j != VirtRegs.size();) { + if (MRI->getRegClass(VirtRegs[j]) != RC) { + ++j; + continue; + } + if (VirtRegs[i] != VirtRegs[j]) + OS << "," << PrintReg(VirtRegs[j]); + VirtRegs.erase(VirtRegs.begin()+j); + } + } + } + + // Print debug location information. + if (isDebugValue() && getOperand(e - 1).isMetadata()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + DIVariable DV(getOperand(e - 1).getMetadata()); + OS << " line no:" << DV.getLineNumber(); + if (MDNode *InlinedAt = DV.getInlinedAt()) { + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); + if (!InlinedAtDL.isUnknown()) { + OS << " inlined @[ "; + printDebugLoc(InlinedAtDL, MF, OS); + OS << " ]"; + } + } + } else if (!debugLoc.isUnknown() && MF) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + OS << " dbg:"; + printDebugLoc(debugLoc, MF, OS); + } + + OS << '\n'; +} + +bool MachineInstr::addRegisterKilled(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isKill()) + // The register is already marked kill. + return true; + if (isPhysReg && isRegTiedToDefOperand(i)) + // Two-address uses of physregs must not be marked kill. + return true; + MO.setIsKill(); + Found = true; + } + } else if (hasAliases && MO.isKill() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // A super-register kill already exists. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded kill operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsKill(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is killed. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + addOperand(MachineOperand::CreateReg(IncomingReg, + false /*IsDef*/, + true /*IsImp*/, + true /*IsKill*/)); + return true; + } + return Found; +} + +void MachineInstr::clearRegisterKills(unsigned Reg, + const TargetRegisterInfo *RegInfo) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + RegInfo = 0; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned OpReg = MO.getReg(); + if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg))) + MO.setIsKill(false); + } +} + +bool MachineInstr::addRegisterDead(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + MO.setIsDead(); + Found = true; + } else if (hasAliases && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // There exists a super-register that's marked dead. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded dead operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsDead(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is dead. Add a + // new implicit operand if required. + if (Found || !AddIfNotFound) + return Found; + + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + true /*IsDead*/)); + return true; +} + +void MachineInstr::addRegisterDefined(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) { + MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (MO) + return; + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() && + MO.getSubReg() == 0) + return; + } + } + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/)); +} + +void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, + const TargetRegisterInfo &TRI) { + bool HasRegMask = false; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (MO.isRegMask()) { + HasRegMask = true; + continue; + } + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + bool Dead = true; + for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + I != E; ++I) + if (TRI.regsOverlap(*I, Reg)) { + Dead = false; + break; + } + // If there are no uses, including partial uses, the def is dead. + if (Dead) MO.setIsDead(); + } + + // This is a call with a register mask operand. + // Mask clobbers are always dead, so add defs for the non-dead defines. + if (HasRegMask) + for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + I != E; ++I) + addRegisterDefined(*I, &TRI); +} + +unsigned +MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { + // Build up a buffer of hash code components. + SmallVector<size_t, 8> HashComponents; + HashComponents.reserve(MI->getNumOperands() + 1); + HashComponents.push_back(MI->getOpcode()); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + HashComponents.push_back(hash_value(MO)); + } + return hash_combine_range(HashComponents.begin(), HashComponents.end()); +} + +void MachineInstr::emitError(StringRef Msg) const { + // Find the source location cookie. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = getNumOperands(); i != 0; --i) { + if (getOperand(i-1).isMetadata() && + (LocMD = getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + if (const MachineBasicBlock *MBB = getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg); + report_fatal_error(Msg); +} diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp new file mode 100644 index 0000000..77bcd1d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -0,0 +1,330 @@ +//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +namespace { + class UnpackMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + UnpackMachineBundles() : MachineFunctionPass(ID) { + initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} // end anonymous namespace + +char UnpackMachineBundles::ID = 0; +char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID; +INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles", + "Unpack machine instruction bundles", false, false) + +bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(), + MIE = MBB->instr_end(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // Remove BUNDLE instruction and the InsideBundle flags from bundled + // instructions. + if (MI->isBundle()) { + while (++MII != MIE && MII->isBundledWithPred()) { + MII->unbundleFromPred(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + } + MI->eraseFromParent(); + + Changed = true; + continue; + } + + ++MII; + } + } + + return Changed; +} + + +namespace { + class FinalizeMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + FinalizeMachineBundles() : MachineFunctionPass(ID) { + initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} // end anonymous namespace + +char FinalizeMachineBundles::ID = 0; +char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID; +INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles", + "Finalize machine instruction bundles", false, false) + +bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) { + return llvm::finalizeBundles(MF); +} + + +/// finalizeBundle - Finalize a machine instruction bundle which includes +/// a sequence of instructions starting from FirstMI to LastMI (exclusive). +/// This routine adds a BUNDLE instruction to represent the bundle, it adds +/// IsInternalRead markers to MachineOperands which are defined inside the +/// bundle, and it copies externally visible defs and uses to the BUNDLE +/// instruction. +void llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI, + MachineBasicBlock::instr_iterator LastMI) { + assert(FirstMI != LastMI && "Empty bundle?"); + MIBundleBuilder Bundle(MBB, FirstMI, LastMI); + + const TargetMachine &TM = MBB.getParent()->getTarget(); + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(), + TII->get(TargetOpcode::BUNDLE)); + Bundle.prepend(MIB); + + SmallVector<unsigned, 32> LocalDefs; + SmallSet<unsigned, 32> LocalDefSet; + SmallSet<unsigned, 8> DeadDefSet; + SmallSet<unsigned, 16> KilledDefSet; + SmallVector<unsigned, 8> ExternUses; + SmallSet<unsigned, 8> ExternUseSet; + SmallSet<unsigned, 8> KilledUseSet; + SmallSet<unsigned, 8> UndefUseSet; + SmallVector<MachineOperand*, 4> Defs; + for (; FirstMI != LastMI; ++FirstMI) { + for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = FirstMI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(&MO); + continue; + } + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { + MO.setIsInternalRead(); + if (MO.isKill()) + // Internal def is now killed. + KilledDefSet.insert(Reg); + } else { + if (ExternUseSet.insert(Reg)) { + ExternUses.push_back(Reg); + if (MO.isUndef()) + UndefUseSet.insert(Reg); + } + if (MO.isKill()) + // External def is now killed. + KilledUseSet.insert(Reg); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + MachineOperand &MO = *Defs[i]; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (LocalDefSet.insert(Reg)) { + LocalDefs.push_back(Reg); + if (MO.isDead()) { + DeadDefSet.insert(Reg); + } + } else { + // Re-defined inside the bundle, it's no longer killed. + KilledDefSet.erase(Reg); + if (!MO.isDead()) + // Previously defined but dead. + DeadDefSet.erase(Reg); + } + + if (!MO.isDead()) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (LocalDefSet.insert(SubReg)) + LocalDefs.push_back(SubReg); + } + } + } + + Defs.clear(); + } + + SmallSet<unsigned, 32> Added; + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + if (Added.insert(Reg)) { + // If it's not live beyond end of the bundle, mark it dead. + bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); + MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) | + getImplRegState(true)); + } + } + + for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) { + unsigned Reg = ExternUses[i]; + bool isKill = KilledUseSet.count(Reg); + bool isUndef = UndefUseSet.count(Reg); + MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | + getImplRegState(true)); + } +} + +/// finalizeBundle - Same functionality as the previous finalizeBundle except +/// the last instruction in the bundle is not provided as an input. This is +/// used in cases where bundles are pre-determined by marking instructions +/// with 'InsideBundle' marker. It returns the MBB instruction iterator that +/// points to the end of the bundle. +MachineBasicBlock::instr_iterator +llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI) { + MachineBasicBlock::instr_iterator E = MBB.instr_end(); + MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI); + while (LastMI != E && LastMI->isInsideBundle()) + ++LastMI; + finalizeBundle(MBB, FirstMI, LastMI); + return LastMI; +} + +/// finalizeBundles - Finalize instruction bundles in the specified +/// MachineFunction. Return true if any bundles are finalized. +bool llvm::finalizeBundles(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &MBB = *I; + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(); + MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); + if (MII == MIE) + continue; + assert(!MII->isInsideBundle() && + "First instr cannot be inside bundle before finalization!"); + + for (++MII; MII != MIE; ) { + if (!MII->isInsideBundle()) + ++MII; + else { + MII = finalizeBundle(MBB, llvm::prior(MII)); + Changed = true; + } + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// MachineOperand iterator +//===----------------------------------------------------------------------===// + +MachineOperandIteratorBase::VirtRegInfo +MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, + SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) { + VirtRegInfo RI = { false, false, false }; + for(; isValid(); ++*this) { + MachineOperand &MO = deref(); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + // Remember each (MI, OpNo) that refers to Reg. + if (Ops) + Ops->push_back(std::make_pair(MO.getParent(), getOperandNo())); + + // Both defs and uses can read virtual registers. + if (MO.readsReg()) { + RI.Reads = true; + if (MO.isDef()) + RI.Tied = true; + } + + // Only defs can write. + if (MO.isDef()) + RI.Writes = true; + else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo())) + RI.Tied = true; + } + return RI; +} + +MachineOperandIteratorBase::PhysRegInfo +MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, + const TargetRegisterInfo *TRI) { + bool AllDefsDead = true; + PhysRegInfo PRI = {false, false, false, false, false, false}; + + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "analyzePhysReg not given a physical register!"); + for (; isValid(); ++*this) { + MachineOperand &MO = deref(); + + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + PRI.Clobbers = true; // Regmask clobbers Reg. + + if (!MO.isReg()) + continue; + + unsigned MOReg = MO.getReg(); + if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + continue; + + bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg); + bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg); + + if (IsRegOrSuperReg && MO.readsReg()) { + // Reg or a super-reg is read, and perhaps killed also. + PRI.Reads = true; + PRI.Kills = MO.isKill(); + } + + if (IsRegOrOverlapping && MO.readsReg()) { + PRI.ReadsOverlap = true;// Reg or an overlapping register is read. + } + + if (!MO.isDef()) + continue; + + if (IsRegOrSuperReg) { + PRI.Defines = true; // Reg or a super-register is defined. + if (!MO.isDead()) + AllDefsDead = false; + } + if (IsRegOrOverlapping) + PRI.Clobbers = true; // Reg or an overlapping reg is defined. + } + + if (AllDefsDead && PRI.Defines) + PRI.DefinesDead = true; // Reg or super-register was defined and was dead. + + return PRI; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp new file mode 100644 index 0000000..ed3ed4d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -0,0 +1,1489 @@ +//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion on machine instructions. We +// attempt to remove as much code from the body of a loop as possible. +// +// This pass does not attempt to throttle itself to limit register pressure. +// The register allocation phases are expected to perform rematerialization +// to recover when register pressure is high. +// +// This pass is not intended to be a replacement or a complete alternative +// for the LLVM-IR-level LICM pass. It is only designed to hoist simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-licm" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +static cl::opt<bool> +AvoidSpeculation("avoid-speculation", + cl::desc("MachineLICM should avoid speculation"), + cl::init(true), cl::Hidden); + +STATISTIC(NumHoisted, + "Number of machine instructions hoisted out of loops"); +STATISTIC(NumLowRP, + "Number of instructions hoisted in low reg pressure situation"); +STATISTIC(NumHighLatency, + "Number of high latency instructions hoisted"); +STATISTIC(NumCSEed, + "Number of hoisted machine instructions CSEed"); +STATISTIC(NumPostRAHoisted, + "Number of machine instructions hoisted out of loops post regalloc"); + +namespace { + class MachineLICM : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetLoweringBase *TLI; + const TargetRegisterInfo *TRI; + const MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + const InstrItineraryData *InstrItins; + bool PreRegAlloc; + + // Various analyses that we use... + AliasAnalysis *AA; // Alias analysis info. + MachineLoopInfo *MLI; // Current MachineLoopInfo + MachineDominatorTree *DT; // Machine dominator tree for the cur loop + + // State that is updated as we process loops + bool Changed; // True if a loop is changed. + bool FirstInLoop; // True if it's the first LICM in the loop. + MachineLoop *CurLoop; // The current loop we are working on. + MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + + // Exit blocks for CurLoop. + SmallVector<MachineBasicBlock*, 8> ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } + + // Track 'estimated' register pressure. + SmallSet<unsigned, 32> RegSeen; + SmallVector<unsigned, 8> RegPressure; + + // Register pressure "limit" per register class. If the pressure + // is higher than the limit, then it's considered high. + SmallVector<unsigned, 8> RegLimit; + + // Register pressure on path leading from loop preheader to current BB. + SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; + + // For each opcode, keep a list of potential CSE instructions. + DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; + + enum { + SpeculateFalse = 0, + SpeculateTrue = 1, + SpeculateUnknown = 2 + }; + + // If a MBB does not dominate loop exiting blocks then it may not safe + // to hoist loads from this block. + // Tri-state: 0 - false, 1 - true, 2 - unknown + unsigned SpeculationState; + + public: + static char ID; // Pass identification, replacement for typeid + MachineLICM() : + MachineFunctionPass(ID), PreRegAlloc(true) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } + + explicit MachineLICM(bool PreRA) : + MachineFunctionPass(ID), PreRegAlloc(PreRA) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual void releaseMemory() { + RegSeen.clear(); + RegPressure.clear(); + RegLimit.clear(); + BackTrace.clear(); + for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator + CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) + CI->second.clear(); + CSEMap.clear(); + } + + private: + /// CandidateInfo - Keep track of information about hoisting candidates. + struct CandidateInfo { + MachineInstr *MI; + unsigned Def; + int FI; + CandidateInfo(MachineInstr *mi, unsigned def, int fi) + : MI(mi), Def(def), FI(fi) {} + }; + + /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop + /// invariants out to the preheader. + void HoistRegionPostRA(); + + /// HoistPostRA - When an instruction is found to only use loop invariant + /// operands that is safe to hoist, this instruction is called to do the + /// dirty work. + void HoistPostRA(MachineInstr *MI, unsigned Def); + + /// ProcessMI - Examine the instruction for potentai LICM candidate. Also + /// gather register def and frame object update information. + void ProcessMI(MachineInstr *MI, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates); + + /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the + /// current loop. + void AddToLiveIns(unsigned Reg); + + /// IsLICMCandidate - Returns true if the instruction may be a suitable + /// candidate for LICM. e.g. If the instruction is a call, then it's + /// obviously not safe to hoist it. + bool IsLICMCandidate(MachineInstr &I); + + /// IsLoopInvariantInst - Returns true if the instruction is loop + /// invariant. I.e., all virtual register operands are defined outside of + /// the loop, physical registers aren't accessed (explicitly or implicitly), + /// and the instruction is hoistable. + /// + bool IsLoopInvariantInst(MachineInstr &I); + + /// HasLoopPHIUse - Return true if the specified instruction is used by any + /// phi node in the current loop. + bool HasLoopPHIUse(const MachineInstr *MI) const; + + /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' + /// and an use in the current loop, return true if the target considered + /// it 'high'. + bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, + unsigned Reg) const; + + bool IsCheapInstruction(MachineInstr &MI) const; + + /// CanCauseHighRegPressure - Visit BBs from header to current BB, + /// check if hoisting an instruction of the given cost matrix can cause high + /// register pressure. + bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); + + /// UpdateBackTraceRegPressure - Traverse the back trace from header to + /// the current block and update their register pressures to reflect the + /// effect of hoisting MI from the current block to the preheader. + void UpdateBackTraceRegPressure(const MachineInstr *MI); + + /// IsProfitableToHoist - Return true if it is potentially profitable to + /// hoist the given loop invariant. + bool IsProfitableToHoist(MachineInstr &MI); + + /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. + /// If not then a load from this mbb may not be safe to hoist. + bool IsGuaranteedToExecute(MachineBasicBlock *BB); + + void EnterScope(MachineBasicBlock *MBB); + + void ExitScope(MachineBasicBlock *MBB); + + /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given + /// dominator tree node if its a leaf or all of its children are done. Walk + /// up the dominator tree to destroy ancestors which are now done. + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); + + /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all + /// blocks dominated by the specified header block, and that are in the + /// current loop) in depth first order w.r.t the DominatorTree. This allows + /// us to visit definitions before uses, allowing us to hoist a loop body in + /// one pass without iteration. + /// + void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + void HoistRegion(MachineDomTreeNode *N, bool IsHeader); + + /// getRegisterClassIDAndCost - For a given MI, register, and the operand + /// index, return the ID and cost of its representative register class by + /// reference. + void getRegisterClassIDAndCost(const MachineInstr *MI, + unsigned Reg, unsigned OpIdx, + unsigned &RCId, unsigned &RCCost) const; + + /// InitRegPressure - Find all virtual register references that are liveout + /// of the preheader to initialize the starting "register pressure". Note + /// this does not count live through (livein but not used) registers. + void InitRegPressure(MachineBasicBlock *BB); + + /// UpdateRegPressure - Update estimate of register pressure after the + /// specified instruction. + void UpdateRegPressure(const MachineInstr *MI); + + /// ExtractHoistableLoad - Unfold a load from the given machineinstr if + /// the load itself could be hoisted. Return the unfolded and hoistable + /// load, or null if the load couldn't be unfolded or if it wouldn't + /// be hoistable. + MachineInstr *ExtractHoistableLoad(MachineInstr *MI); + + /// LookForDuplicate - Find an instruction amount PrevMIs that is a + /// duplicate of MI. Return this instruction if it's found. + const MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs); + + /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on + /// the preheader that compute the same value. If it's found, do a RAU on + /// with the definition of the existing instruction rather than hoisting + /// the instruction to the preheader. + bool EliminateCSE(MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); + + /// MayCSE - Return true if the given instruction will be CSE'd if it's + /// hoisted out of the loop. + bool MayCSE(MachineInstr *MI); + + /// Hoist - When an instruction is found to only use loop invariant operands + /// that is safe to hoist, this instruction is called to do the dirty work. + /// It returns true if the instruction is hoisted. + bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); + + /// InitCSEMap - Initialize the CSE map with instructions that are in the + /// current loop preheader that may become duplicates of instructions that + /// are hoisted out of the loop. + void InitCSEMap(MachineBasicBlock *BB); + + /// getCurPreheader - Get the preheader for the current loop, splitting + /// a critical edge if needed. + MachineBasicBlock *getCurPreheader(); + }; +} // end anonymous namespace + +char MachineLICM::ID = 0; +char &llvm::MachineLICMID = MachineLICM::ID; +INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false) + +/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most +/// loop that has a unique predecessor. +static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { + // Check whether this loop even has a unique predecessor. + if (!CurLoop->getLoopPredecessor()) + return false; + // Ok, now check to see if any of its outer loops do. + for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) + if (L->getLoopPredecessor()) + return false; + // None of them did, so this is the outermost with a unique predecessor. + return true; +} + +bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { + Changed = FirstInLoop = false; + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + TLI = TM->getTargetLowering(); + TRI = TM->getRegisterInfo(); + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + InstrItins = TM->getInstrItineraryData(); + + PreRegAlloc = MRI->isSSA(); + + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); + DEBUG(dbgs() << MF.getName() << " ********\n"); + + if (PreRegAlloc) { + // Estimate register pressure during pre-regalloc pass. + unsigned NumRC = TRI->getNumRegClasses(); + RegPressure.resize(NumRC); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + RegLimit.resize(NumRC); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); + } + + // Get our Loop information... + MLI = &getAnalysis<MachineLoopInfo>(); + DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); + + SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + CurLoop = Worklist.pop_back_val(); + CurPreheader = 0; + ExitBlocks.clear(); + + // If this is done before regalloc, only visit outer-most preheader-sporting + // loops. + if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { + Worklist.append(CurLoop->begin(), CurLoop->end()); + continue; + } + + CurLoop->getExitBlocks(ExitBlocks); + + if (!PreRegAlloc) + HoistRegionPostRA(); + else { + // CSEMap is initialized for loop header when the first instruction is + // being hoisted. + MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + FirstInLoop = true; + HoistOutOfLoop(N); + CSEMap.clear(); + } + } + + return Changed; +} + +/// InstructionStoresToFI - Return true if instruction stores to the +/// specified frame. +static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); o != oe; ++o) { + if (!(*o)->isStore() || !(*o)->getValue()) + continue; + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + if (Value->getFrameIndex() == FI) + return true; + } + } + return false; +} + +/// ProcessMI - Examine the instruction for potentai LICM candidate. Also +/// gather register def and frame object update information. +void MachineLICM::ProcessMI(MachineInstr *MI, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates) { + bool RuledOut = false; + bool HasNonInvariantUse = false; + unsigned Def = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isFI()) { + // Remember if the instruction stores to the frame index. + int FI = MO.getIndex(); + if (!StoredFIs.count(FI) && + MFI->isSpillSlotObjectIndex(FI) && + InstructionStoresToFI(MI, FI)) + StoredFIs.insert(FI); + HasNonInvariantUse = true; + continue; + } + + // We can't hoist an instruction defining a physreg that is clobbered in + // the loop. + if (MO.isRegMask()) { + PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); + continue; + } + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Not expecting virtual register!"); + + if (!MO.isDef()) { + if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) + // If it's using a non-loop-invariant register, then it's obviously not + // safe to hoist. + HasNonInvariantUse = true; + continue; + } + + if (MO.isImplicit()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegClobbers.set(*AI); + if (!MO.isDead()) + // Non-dead implicit def? This cannot be hoisted. + RuledOut = true; + // No need to check if a dead implicit def is also defined by + // another instruction. + continue; + } + + // FIXME: For now, avoid instructions with multiple defs, unless + // it's a dead implicit def. + if (Def) + RuledOut = true; + else + Def = Reg; + + // If we have already seen another instruction that defines the same + // register, then this is not safe. Two defs is indicated by setting a + // PhysRegClobbers bit. + for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { + if (PhysRegDefs.test(*AS)) + PhysRegClobbers.set(*AS); + if (PhysRegClobbers.test(*AS)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + PhysRegDefs.set(*AS); + } + } + + // Only consider reloads for now and remats which do not have register + // operands. FIXME: Consider unfold load folding instructions. + if (Def && !RuledOut) { + int FI = INT_MIN; + if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || + (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) + Candidates.push_back(CandidateInfo(MI, Def, FI)); + } +} + +/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop +/// invariants out to the preheader. +void MachineLICM::HoistRegionPostRA() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + unsigned NumRegs = TRI->getNumRegs(); + BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. + BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. + + SmallVector<CandidateInfo, 32> Candidates; + SmallSet<int, 32> StoredFIs; + + // Walk the entire region, count number of defs for each register, and + // collect potential LICM candidates. + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isLandingPad()) continue; + + // Conservatively treat live-in's as an external def. + // FIXME: That means a reload that're reused in successor block(s) will not + // be LICM'ed. + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegDefs.set(*AI); + } + + SpeculationState = SpeculateUnknown; + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); + } + } + + // Gather the registers read / clobbered by the terminator. + BitVector TermRegs(NumRegs); + MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); + if (TI != Preheader->end()) { + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = TI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + TermRegs.set(*AI); + } + } + + // Now evaluate whether the potential candidates qualify. + // 1. Check if the candidate defined register is defined by another + // instruction in the loop. + // 2. If the candidate is a load from stack slot (always true for now), + // check if the slot is stored anywhere in the loop. + // 3. Make sure candidate def should not clobber + // registers read by the terminator. Similarly its def should not be + // clobbered by the terminator. + for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { + if (Candidates[i].FI != INT_MIN && + StoredFIs.count(Candidates[i].FI)) + continue; + + unsigned Def = Candidates[i].Def; + if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { + bool Safe = true; + MachineInstr *MI = Candidates[i].MI; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.isDef() || !MO.getReg()) + continue; + unsigned Reg = MO.getReg(); + if (PhysRegDefs.test(Reg) || + PhysRegClobbers.test(Reg)) { + // If it's using a non-loop-invariant register, then it's obviously + // not safe to hoist. + Safe = false; + break; + } + } + if (Safe) + HoistPostRA(MI, Candidates[i].Def); + } + } +} + +/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current +/// loop, and make sure it is not killed by any instructions in the loop. +void MachineLICM::AddToLiveIns(unsigned Reg) { + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + if (!BB->isLiveIn(Reg)) + BB->addLiveIn(Reg); + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; + if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) + MO.setIsKill(false); + } + } + } +} + +/// HoistPostRA - When an instruction is found to only use loop invariant +/// operands that is safe to hoist, this instruction is called to do the +/// dirty work. +void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + MachineBasicBlock *Preheader = getCurPreheader(); + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" + << MI->getParent()->getNumber() << ": " << *MI); + + // Splice the instruction to the preheader. + MachineBasicBlock *MBB = MI->getParent(); + Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); + + // Add register to livein list to all the BBs in the current loop since a + // loop invariant must be kept live throughout the whole loop. This is + // important to ensure later passes do not scavenge the def register. + AddToLiveIns(Def); + + ++NumPostRAHoisted; + Changed = true; +} + +// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. +// If not then a load from this mbb may not be safe to hoist. +bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { + if (SpeculationState != SpeculateUnknown) + return SpeculationState == SpeculateFalse; + + if (BB != CurLoop->getHeader()) { + // Check loop exiting blocks. + SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; + CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); + for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) + if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { + SpeculationState = SpeculateTrue; + return false; + } + } + + SpeculationState = SpeculateFalse; + return true; +} + +void MachineLICM::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + + // Remember livein register pressure. + BackTrace.push_back(RegPressure); +} + +void MachineLICM::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + BackTrace.pop_back(); +} + +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + if (OpenChildren[Node]) + return; + + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = ParentMap[Node]) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all +/// blocks dominated by the specified header block, and that are in the +/// current loop) in depth first order w.r.t the DominatorTree. This allows +/// us to visit definitions before uses, allowing us to hoist a loop body in +/// one pass without iteration. +/// +void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(HeaderN); + do { + MachineDomTreeNode *Node = WorkList.pop_back_val(); + assert(Node != 0 && "Null dominator tree node?"); + MachineBasicBlock *BB = Node->getBlock(); + + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isLandingPad()) + continue; + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) + continue; + + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + + // Don't hoist things out of a large switch statement. This often causes + // code to be hoisted that wasn't going to be executed, and increases + // register pressure in a situation where it's likely to matter. + if (BB->succ_size() >= 25) + NumChildren = 0; + + OpenChildren[Node] = NumChildren; + // Add children in reverse order as then the next popped worklist node is + // the first child of this node. This means we ultimately traverse the + // DOM tree in exactly the same order as if we'd recursed. + for (int i = (int)NumChildren-1; i >= 0; --i) { + MachineDomTreeNode *Child = Children[i]; + ParentMap[Child] = Node; + WorkList.push_back(Child); + } + } while (!WorkList.empty()); + + if (Scopes.size() != 0) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + // Compute registers which are livein into the loop headers. + RegSeen.clear(); + BackTrace.clear(); + InitRegPressure(Preheader); + } + + // Now perform LICM. + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); + + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + continue; + + EnterScope(MBB); + + // Process the block + SpeculationState = SpeculateUnknown; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ) { + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + MachineInstr *MI = &*MII; + if (!Hoist(MI, Preheader)) + UpdateRegPressure(MI); + MII = NextMII; + } + + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren, ParentMap); + } +} + +static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { + return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); +} + +/// getRegisterClassIDAndCost - For a given MI, register, and the operand +/// index, return the ID and cost of its representative register class. +void +MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, + unsigned Reg, unsigned OpIdx, + unsigned &RCId, unsigned &RCCost) const { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + MVT VT = *RC->vt_begin(); + if (VT == MVT::Untyped) { + RCId = RC->getID(); + RCCost = 1; + } else { + RCId = TLI->getRepRegClassFor(VT)->getID(); + RCCost = TLI->getRepRegClassCostFor(VT); + } +} + +/// InitRegPressure - Find all virtual register references that are liveout of +/// the preheader to initialize the starting "register pressure". Note this +/// does not count live through (livein but not used) registers. +void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { + std::fill(RegPressure.begin(), RegPressure.end(), 0); + + // If the preheader has only a single predecessor and it ends with a + // fallthrough or an unconditional branch, then scan its predecessor for live + // defs as well. This happens whenever the preheader is created by splitting + // the critical edge from the loop predecessor to the loop header. + if (BB->pred_size() == 1) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) + InitRegPressure(*BB->pred_begin()); + } + + for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); + MII != E; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + bool isNew = RegSeen.insert(Reg); + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); + if (MO.isDef()) + RegPressure[RCId] += RCCost; + else { + bool isKill = isOperandKill(MO, MRI); + if (isNew && !isKill) + // Haven't seen this, it must be a livein. + RegPressure[RCId] += RCCost; + else if (!isNew && isKill) + RegPressure[RCId] -= RCCost; + } + } + } +} + +/// UpdateRegPressure - Update estimate of register pressure after the +/// specified instruction. +void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { + if (MI->isImplicitDef()) + return; + + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + bool isNew = RegSeen.insert(Reg); + if (MO.isDef()) + Defs.push_back(Reg); + else if (!isNew && isOperandKill(MO, MRI)) { + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); + if (RCCost > RegPressure[RCId]) + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= RCCost; + } + } + + unsigned Idx = 0; + while (!Defs.empty()) { + unsigned Reg = Defs.pop_back_val(); + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); + RegPressure[RCId] += RCCost; + ++Idx; + } +} + +/// isLoadFromGOTOrConstantPool - Return true if this machine instruction +/// loads from global offset table or constant pool. +static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert (MI.mayLoad() && "Expected MI that loads!"); + for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), + E = MI.memoperands_end(); I != E; ++I) { + if (const Value *V = (*I)->getValue()) { + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) + if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) + return true; + } + } + return false; +} + +/// IsLICMCandidate - Returns true if the instruction may be a suitable +/// candidate for LICM. e.g. If the instruction is a call, then it's obviously +/// not safe to hoist it. +bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + // Check if it's safe to move the instruction. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) + return false; + + // If it is load then check if it is guaranteed to execute by making sure that + // it dominates all exiting blocks. If it doesn't, then there is a path out of + // the loop which does not execute this load, so we can't hoist it. Loads + // from constant memory are not safe to speculate all the time, for example + // indexed load from a jump table. + // Stores and side effects are already checked by isSafeToMove. + if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && + !IsGuaranteedToExecute(I.getParent())) + return false; + + return true; +} + +/// IsLoopInvariantInst - Returns true if the instruction is loop +/// invariant. I.e., all virtual register operands are defined outside of the +/// loop, physical registers aren't accessed explicitly, and there are no side +/// effects that aren't captured by the operands or other flags. +/// +bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { + if (!IsLICMCandidate(I)) + return false; + + // The instruction is loop invariant if all of its operands are. + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I.getOperand(i); + + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + // Don't hoist an instruction that uses or defines a physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return false; + } else if (CurLoop->getHeader()->isLiveIn(Reg)) { + // If the reg is live into the loop, we can't hoist an instruction + // which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && + "Machine instr not mapped for this vreg?!"); + + // If the loop contains the definition of an operand, then the instruction + // isn't loop invariant. + if (CurLoop->contains(MRI->getVRegDef(Reg))) + return false; + } + + // If we got this far, the instruction is loop invariant! + return true; +} + + +/// HasLoopPHIUse - Return true if the specified instruction is used by a +/// phi node and hoisting it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector<const MachineInstr*, 8> Work(1, MI); + do { + MI = Work.pop_back_val(); + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + // A PHI may cause a copy to be inserted. + if (UseMI->isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI->getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI->isCopy() && CurLoop->contains(UseMI)) + Work.push_back(UseMI); + } + } + } while (!Work.empty()); + return false; +} + +/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' +/// and an use in the current loop, return true if the target considered +/// it 'high'. +bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, + unsigned DefIdx, unsigned Reg) const { + if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) + return false; + + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *UseMI = &*I; + if (UseMI->isCopyLike()) + continue; + if (!CurLoop->contains(UseMI->getParent())) + continue; + for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) + return true; + } + + // Only look at the first in loop use. + break; + } + + return false; +} + +/// IsCheapInstruction - Return true if the instruction is marked "cheap" or +/// the operand latency between its def and a use is one or less. +bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { + if (MI.isAsCheapAsAMove() || MI.isCopyLike()) + return true; + if (!InstrItins || InstrItins->isEmpty()) + return false; + + bool isCheap = false; + unsigned NumDefs = MI.getDesc().getNumDefs(); + for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &DefMO = MI.getOperand(i); + if (!DefMO.isReg() || !DefMO.isDef()) + continue; + --NumDefs; + unsigned Reg = DefMO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + if (!TII->hasLowDefLatency(InstrItins, &MI, i)) + return false; + isCheap = true; + } + + return isCheap; +} + +/// CanCauseHighRegPressure - Visit BBs from header to current BB, check +/// if hoisting an instruction of the given cost matrix can cause high +/// register pressure. +bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, + bool CheapInstr) { + for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); + CI != CE; ++CI) { + if (CI->second <= 0) + continue; + + unsigned RCId = CI->first; + unsigned Limit = RegLimit[RCId]; + int Cost = CI->second; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr) + return true; + + for (unsigned i = BackTrace.size(); i != 0; --i) { + SmallVector<unsigned, 8> &RP = BackTrace[i-1]; + if (RP[RCId] + Cost >= Limit) + return true; + } + } + + return false; +} + +/// UpdateBackTraceRegPressure - Traverse the back trace from header to the +/// current block and update their register pressures to reflect the effect +/// of hoisting MI from the current block to the preheader. +void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { + if (MI->isImplicitDef()) + return; + + // First compute the 'cost' of the instruction, i.e. its contribution + // to register pressure. + DenseMap<unsigned, int> Cost; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + unsigned RCId, RCCost; + getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); + if (CI != Cost.end()) + CI->second += RCCost; + else + Cost.insert(std::make_pair(RCId, RCCost)); + } else if (isOperandKill(MO, MRI)) { + DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); + if (CI != Cost.end()) + CI->second -= RCCost; + else + Cost.insert(std::make_pair(RCId, -RCCost)); + } + } + + // Update register pressure of blocks from loop header to current block. + for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { + SmallVector<unsigned, 8> &RP = BackTrace[i]; + for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); + CI != CE; ++CI) { + unsigned RCId = CI->first; + RP[RCId] += CI->second; + } + } +} + +/// IsProfitableToHoist - Return true if it is potentially profitable to hoist +/// the given loop invariant. +bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { + if (MI.isImplicitDef()) + return true; + + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. + + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); + + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; + } + + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; + + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap<unsigned, int> Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; + } + Cost[RCId] += RCCost; + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + Cost[RCId] -= RCCost; + } + } + + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } + + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } + + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; + } + + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + return false; + } + + return true; +} + +MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { + // Don't unfold simple loads. + if (MI->canFoldAsLoad()) + return 0; + + // If not, we may be able to unfold a load and hoist that. + // First test whether the instruction is loading from an amenable + // memory location. + if (!MI->isInvariantLoad(AA)) + return 0; + + // Next determine the register class for a temporary register. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc == 0) return 0; + const MCInstrDesc &MID = TII->get(NewOpc); + if (MID.getNumDefs() != 1) return 0; + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); + // Ok, we're unfolding. Create a temporary register and do the unfold. + unsigned Reg = MRI->createVirtualRegister(RC); + + SmallVector<MachineInstr *, 2> NewMIs; + bool Success = + TII->unfoldMemoryOperand(MF, MI, Reg, + /*UnfoldLoad=*/true, /*UnfoldStore=*/false, + NewMIs); + (void)Success; + assert(Success && + "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " + "succeeded!"); + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator Pos = MI; + MBB->insert(Pos, NewMIs[0]); + MBB->insert(Pos, NewMIs[1]); + // If unfolding produced a load that wasn't loop-invariant or profitable to + // hoist, discard the new instructions and bail. + if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + return 0; + } + + // Update register pressure for the unfolded instruction. + UpdateRegPressure(NewMIs[1]); + + // Otherwise we successfully unfolded a load that we can hoist. + MI->eraseFromParent(); + return NewMIs[0]; +} + +void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { + for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { + const MachineInstr *MI = &*I; + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else { + std::vector<const MachineInstr*> CSEMIs; + CSEMIs.push_back(MI); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); + } + } +} + +const MachineInstr* +MachineLICM::LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs) { + for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { + const MachineInstr *PrevMI = PrevMIs[i]; + if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) + return PrevMI; + } + return 0; +} + +bool MachineLICM::EliminateCSE(MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == CSEMap.end() || MI->isImplicitDef()) + return false; + + if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { + DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); + + // Replace virtual registers defined by MI by their counterparts defined + // by Dup. + SmallVector<unsigned, 2> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + // Physical registers may not differ here. + assert((!MO.isReg() || MO.getReg() == 0 || + !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + MO.getReg() == Dup->getOperand(i).getReg()) && + "Instructions with different phys regs are not identical!"); + + if (MO.isReg() && MO.isDef() && + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + Defs.push_back(i); + } + + SmallVector<const TargetRegisterClass*, 2> OrigRCs; + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + OrigRCs.push_back(MRI->getRegClass(DupReg)); + + if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { + // Restore old RCs if more than one defs. + for (unsigned j = 0; j != i; ++j) + MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); + return false; + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + MRI->replaceRegWith(Reg, DupReg); + MRI->clearKillFlags(DupReg); + } + + MI->eraseFromParent(); + ++NumCSEed; + return true; + } + return false; +} + +/// MayCSE - Return true if the given instruction will be CSE'd if it's +/// hoisted out of the loop. +bool MachineLICM::MayCSE(MachineInstr *MI) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == CSEMap.end() || MI->isImplicitDef()) + return false; + + return LookForDuplicate(MI, CI->second) != 0; +} + +/// Hoist - When an instruction is found to use only loop invariant operands +/// that are safe to hoist, this instruction is called to do the dirty work. +/// +bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { + // First check whether we should hoist this instruction. + if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { + // If not, try unfolding a hoistable load. + MI = ExtractHoistableLoad(MI); + if (!MI) return false; + } + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + dbgs() << "Hoisting " << *MI; + if (Preheader->getBasicBlock()) + dbgs() << " to MachineBasicBlock " + << Preheader->getName(); + if (MI->getParent()->getBasicBlock()) + dbgs() << " from MachineBasicBlock " + << MI->getParent()->getName(); + dbgs() << "\n"; + }); + + // If this is the first instruction being hoisted to the preheader, + // initialize the CSE map with potential common expressions. + if (FirstInLoop) { + InitCSEMap(Preheader); + FirstInLoop = false; + } + + // Look for opportunity to CSE the hoisted instruction. + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + if (!EliminateCSE(MI, CI)) { + // Otherwise, splice the instruction to the preheader. + Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); + + // Update register pressure for BBs from header to this block. + UpdateBackTraceRegPressure(MI); + + // Clear the kill flags of any register this instruction defines, + // since they may need to be live throughout the entire loop + // rather than just live for part of it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isDead()) + MRI->clearKillFlags(MO.getReg()); + } + + // Add to the CSE map. + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else { + std::vector<const MachineInstr*> CSEMIs; + CSEMIs.push_back(MI); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); + } + } + + ++NumHoisted; + Changed = true; + + return true; +} + +MachineBasicBlock *MachineLICM::getCurPreheader() { + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop predecessor, we can't do any hoisting. + + // If we've tried to get a preheader and failed, don't try again. + if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) + return 0; + + if (!CurPreheader) { + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) { + MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); + if (!Pred) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return 0; + } + + CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); + if (!CurPreheader) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return 0; + } + } + } + return CurPreheader; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp new file mode 100644 index 0000000..4e2cfdc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -0,0 +1,81 @@ +//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachineLoopInfo class that is used to identify natural +// loops and determine the loop depth of various nodes of the CFG. Note that +// the loops identified may actually be several natural loops that share the +// same header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/Analysis/LoopInfoImpl.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. +template class llvm::LoopBase<MachineBasicBlock, MachineLoop>; +template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>; + +char MachineLoopInfo::ID = 0; +INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true) + +char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; + +bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + LI.Analyze(getAnalysis<MachineDominatorTree>().getBase()); + return false; +} + +void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineBasicBlock *MachineLoop::getTopBlock() { + MachineBasicBlock *TopMBB = getHeader(); + MachineFunction::iterator Begin = TopMBB->getParent()->begin(); + if (TopMBB != Begin) { + MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB)); + while (contains(PriorMBB)) { + TopMBB = PriorMBB; + if (TopMBB == Begin) break; + PriorMBB = prior(MachineFunction::iterator(TopMBB)); + } + } + return TopMBB; +} + +MachineBasicBlock *MachineLoop::getBottomBlock() { + MachineBasicBlock *BotMBB = getHeader(); + MachineFunction::iterator End = BotMBB->getParent()->end(); + if (BotMBB != prior(End)) { + MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + while (contains(NextMBB)) { + BotMBB = NextMBB; + if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break; + NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + } + } + return BotMBB; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineLoop::dump() const { + print(dbgs()); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp new file mode 100644 index 0000000..0ea9ae0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -0,0 +1,578 @@ +//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; +using namespace llvm::dwarf; + +// Handle the Pass registration stuff necessary to use DataLayout's. +INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false) +char MachineModuleInfo::ID = 0; + +// Out of line virtual method. +MachineModuleInfoImpl::~MachineModuleInfoImpl() {} + +namespace llvm { +class MMIAddrLabelMapCallbackPtr : CallbackVH { + MMIAddrLabelMap *Map; +public: + MMIAddrLabelMapCallbackPtr() : Map(0) {} + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {} + + void setPtr(BasicBlock *BB) { + ValueHandleBase::operator=(BB); + } + + void setMap(MMIAddrLabelMap *map) { Map = map; } + + virtual void deleted(); + virtual void allUsesReplacedWith(Value *V2); +}; + +class MMIAddrLabelMap { + MCContext &Context; + struct AddrLabelSymEntry { + /// Symbols - The symbols for the label. This is a pointer union that is + /// either one symbol (the common case) or a list of symbols. + PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols; + + Function *Fn; // The containing function of the BasicBlock. + unsigned Index; // The index in BBCallbacks for the BasicBlock. + }; + + DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols; + + /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We + /// use this so we get notified if a block is deleted or RAUWd. + std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; + + /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols + /// whose corresponding BasicBlock got deleted. These symbols need to be + /// emitted at some point in the file, so AsmPrinter emits them after the + /// function body. + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> > + DeletedAddrLabelsNeedingEmission; +public: + + MMIAddrLabelMap(MCContext &context) : Context(context) {} + ~MMIAddrLabelMap() { + assert(DeletedAddrLabelsNeedingEmission.empty() && + "Some labels for deleted blocks never got emitted"); + + // Deallocate any of the 'list of symbols' case. + for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator + I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I) + if (I->second.Symbols.is<std::vector<MCSymbol*>*>()) + delete I->second.Symbols.get<std::vector<MCSymbol*>*>(); + } + + MCSymbol *getAddrLabelSymbol(BasicBlock *BB); + std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB); + + void takeDeletedSymbolsForFunction(Function *F, + std::vector<MCSymbol*> &Result); + + void UpdateForDeletedBlock(BasicBlock *BB); + void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); +}; +} + +MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) { + assert(BB->hasAddressTaken() && + "Shouldn't get label for block without address taken"); + AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; + + // If we already had an entry for this block, just return it. + if (!Entry.Symbols.isNull()) { + assert(BB->getParent() == Entry.Fn && "Parent changed"); + if (Entry.Symbols.is<MCSymbol*>()) + return Entry.Symbols.get<MCSymbol*>(); + return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0]; + } + + // Otherwise, this is a new entry, create a new symbol for it and add an + // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. + BBCallbacks.push_back(BB); + BBCallbacks.back().setMap(this); + Entry.Index = BBCallbacks.size()-1; + Entry.Fn = BB->getParent(); + MCSymbol *Result = Context.CreateTempSymbol(); + Entry.Symbols = Result; + return Result; +} + +std::vector<MCSymbol*> +MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { + assert(BB->hasAddressTaken() && + "Shouldn't get label for block without address taken"); + AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; + + std::vector<MCSymbol*> Result; + + // If we already had an entry for this block, just return it. + if (Entry.Symbols.isNull()) + Result.push_back(getAddrLabelSymbol(BB)); + else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) + Result.push_back(Sym); + else + Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>(); + return Result; +} + + +/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return +/// them. +void MMIAddrLabelMap:: +takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I = + DeletedAddrLabelsNeedingEmission.find(F); + + // If there are no entries for the function, just return. + if (I == DeletedAddrLabelsNeedingEmission.end()) return; + + // Otherwise, take the list. + std::swap(Result, I->second); + DeletedAddrLabelsNeedingEmission.erase(I); +} + + +void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { + // If the block got deleted, there is no need for the symbol. If the symbol + // was already emitted, we can just forget about it, otherwise we need to + // queue it up for later emission when the function is output. + AddrLabelSymEntry Entry = AddrLabelSymbols[BB]; + AddrLabelSymbols.erase(BB); + assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?"); + BBCallbacks[Entry.Index] = 0; // Clear the callback. + + assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) && + "Block/parent mismatch"); + + // Handle both the single and the multiple symbols cases. + if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) { + if (Sym->isDefined()) + return; + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } else { + std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>(); + + for (unsigned i = 0, e = Syms->size(); i != e; ++i) { + MCSymbol *Sym = (*Syms)[i]; + if (Sym->isDefined()) continue; // Ignore already emitted labels. + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from + // 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } + + // The entry is deleted, free the memory associated with the symbol list. + delete Syms; + } +} + +void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { + // Get the entry for the RAUW'd block and remove it from our map. + AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old]; + AddrLabelSymbols.erase(Old); + assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?"); + + AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New]; + + // If New is not address taken, just move our symbol over to it. + if (NewEntry.Symbols.isNull()) { + BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. + NewEntry = OldEntry; // Set New's entry. + return; + } + + BBCallbacks[OldEntry.Index] = 0; // Update the callback. + + // Otherwise, we need to add the old symbol to the new block's set. If it is + // just a single entry, upgrade it to a symbol list. + if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) { + std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>(); + SymList->push_back(PrevSym); + NewEntry.Symbols = SymList; + } + + std::vector<MCSymbol*> *SymList = + NewEntry.Symbols.get<std::vector<MCSymbol*>*>(); + + // If the old entry was a single symbol, add it. + if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) { + SymList->push_back(Sym); + return; + } + + // Otherwise, concatenate the list. + std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>(); + SymList->insert(SymList->end(), Syms->begin(), Syms->end()); + delete Syms; +} + + +void MMIAddrLabelMapCallbackPtr::deleted() { + Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr())); +} + +void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { + Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); +} + + +//===----------------------------------------------------------------------===// + +MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, + const MCObjectFileInfo *MOFI) + : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) { + initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); +} + +MachineModuleInfo::MachineModuleInfo() + : ImmutablePass(ID), + Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { + llvm_unreachable("This MachineModuleInfo constructor should never be called, " + "MMI should always be explicitly constructed by " + "LLVMTargetMachine"); +} + +MachineModuleInfo::~MachineModuleInfo() { +} + +bool MachineModuleInfo::doInitialization(Module &M) { + + ObjFileMMI = 0; + CompactUnwindEncoding = 0; + CurCallSite = 0; + CallsEHReturn = 0; + CallsUnwindInit = 0; + DbgInfoAvailable = UsesVAFloatArgument = false; + // Always emit some info, by default "no personality" info. + Personalities.push_back(NULL); + AddrLabelSymbols = 0; + TheModule = 0; + + return false; +} + +bool MachineModuleInfo::doFinalization(Module &M) { + + Personalities.clear(); + + delete AddrLabelSymbols; + AddrLabelSymbols = 0; + + Context.reset(); + + delete ObjFileMMI; + ObjFileMMI = 0; + + return false; +} + +/// EndFunction - Discard function meta information. +/// +void MachineModuleInfo::EndFunction() { + // Clean up frame info. + FrameMoves.clear(); + + // Clean up exception info. + LandingPads.clear(); + CallSiteMap.clear(); + TypeInfos.clear(); + FilterIds.clear(); + FilterEnds.clear(); + CallsEHReturn = 0; + CallsUnwindInit = 0; + CompactUnwindEncoding = 0; + VariableDbgInfo.clear(); +} + +/// AnalyzeModule - Scan the module for global debug information. +/// +void MachineModuleInfo::AnalyzeModule(const Module &M) { + // Insert functions in the llvm.used array (but not llvm.compiler.used) into + // UsedFunctions. + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const Function *F = + dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts())) + UsedFunctions.insert(F); +} + +//===- Address of Block Management ----------------------------------------===// + + +/// getAddrLabelSymbol - Return the symbol to be used for the specified basic +/// block when its address is taken. This cannot be its normal LBB label +/// because the block may be accessed outside its containing function. +MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) { + // Lazily create AddrLabelSymbols. + if (AddrLabelSymbols == 0) + AddrLabelSymbols = new MMIAddrLabelMap(Context); + return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB)); +} + +/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified +/// basic block when its address is taken. If other blocks were RAUW'd to +/// this one, we may have to emit them as well, return the whole set. +std::vector<MCSymbol*> MachineModuleInfo:: +getAddrLabelSymbolToEmit(const BasicBlock *BB) { + // Lazily create AddrLabelSymbols. + if (AddrLabelSymbols == 0) + AddrLabelSymbols = new MMIAddrLabelMap(Context); + return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); +} + + +/// takeDeletedSymbolsForFunction - If the specified function has had any +/// references to address-taken blocks generated, but the block got deleted, +/// return the symbol now so we can emit it. This prevents emitting a +/// reference to a symbol that has no definition. +void MachineModuleInfo:: +takeDeletedSymbolsForFunction(const Function *F, + std::vector<MCSymbol*> &Result) { + // If no blocks have had their addresses taken, we're done. + if (AddrLabelSymbols == 0) return; + return AddrLabelSymbols-> + takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); +} + +//===- EH -----------------------------------------------------------------===// + +/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the +/// specified MachineBasicBlock. +LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo + (MachineBasicBlock *LandingPad) { + unsigned N = LandingPads.size(); + for (unsigned i = 0; i < N; ++i) { + LandingPadInfo &LP = LandingPads[i]; + if (LP.LandingPadBlock == LandingPad) + return LP; + } + + LandingPads.push_back(LandingPadInfo(LandingPad)); + return LandingPads[N]; +} + +/// addInvoke - Provide the begin and end labels of an invoke style call and +/// associate it with a try landing pad block. +void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, + MCSymbol *BeginLabel, MCSymbol *EndLabel) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.BeginLabels.push_back(BeginLabel); + LP.EndLabels.push_back(EndLabel); +} + +/// addLandingPad - Provide the label of a try LandingPad block. +/// +MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { + MCSymbol *LandingPadLabel = Context.CreateTempSymbol(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.LandingPadLabel = LandingPadLabel; + return LandingPadLabel; +} + +/// addPersonality - Provide the personality function for the exception +/// information. +void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, + const Function *Personality) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.Personality = Personality; + + for (unsigned i = 0; i < Personalities.size(); ++i) + if (Personalities[i] == Personality) + return; + + // If this is the first personality we're adding go + // ahead and add it at the beginning. + if (Personalities[0] == NULL) + Personalities[0] = Personality; + else + Personalities.push_back(Personality); +} + +/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. +/// +void MachineModuleInfo:: +addCatchTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalVariable *> TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + for (unsigned N = TyInfo.size(); N; --N) + LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); +} + +/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. +/// +void MachineModuleInfo:: +addFilterTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalVariable *> TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + std::vector<unsigned> IdsInFilter(TyInfo.size()); + for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) + IdsInFilter[I] = getTypeIDFor(TyInfo[I]); + LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); +} + +/// addCleanup - Add a cleanup action for a landing pad. +/// +void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.TypeIds.push_back(0); +} + +/// TidyLandingPads - Remap landing pad labels and remove any deleted landing +/// pads. +void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { + for (unsigned i = 0; i != LandingPads.size(); ) { + LandingPadInfo &LandingPad = LandingPads[i]; + if (LandingPad.LandingPadLabel && + !LandingPad.LandingPadLabel->isDefined() && + (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) + LandingPad.LandingPadLabel = 0; + + // Special case: we *should* emit LPs with null LP MBB. This indicates + // "nounwind" case. + if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { + MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; + MCSymbol *EndLabel = LandingPad.EndLabels[j]; + if ((BeginLabel->isDefined() || + (LPMap && (*LPMap)[BeginLabel] != 0)) && + (EndLabel->isDefined() || + (LPMap && (*LPMap)[EndLabel] != 0))) continue; + + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + --j, --e; + } + + // Remove landing pads with no try-ranges. + if (LandingPads[i].BeginLabels.empty()) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + // If there is no landing pad, ensure that the list of typeids is empty. + // If the only typeid is a cleanup, this is the same as having no typeids. + if (!LandingPad.LandingPadBlock || + (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) + LandingPad.TypeIds.clear(); + ++i; + } +} + +/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site +/// indexes. +void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, + ArrayRef<unsigned> Sites) { + LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); +} + +/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// function wide. +unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) { + for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) + if (TypeInfos[i] == TI) return i + 1; + + TypeInfos.push_back(TI); + return TypeInfos.size(); +} + +/// getFilterIDFor - Return the filter id for the specified typeinfos. This is +/// function wide. +int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) { + // If the new filter coincides with the tail of an existing filter, then + // re-use the existing filter. Folding filters more than this requires + // re-ordering filters and/or their elements - probably not worth it. + for (std::vector<unsigned>::iterator I = FilterEnds.begin(), + E = FilterEnds.end(); I != E; ++I) { + unsigned i = *I, j = TyIds.size(); + + while (i && j) + if (FilterIds[--i] != TyIds[--j]) + goto try_next; + + if (!j) + // The new filter coincides with range [i, end) of the existing filter. + return -(1 + i); + +try_next:; + } + + // Add the new filter. + int FilterID = -(1 + FilterIds.size()); + FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); + FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end()); + FilterEnds.push_back(FilterIds.size()); + FilterIds.push_back(0); // terminator + return FilterID; +} + +/// getPersonality - Return the personality function for the current function. +const Function *MachineModuleInfo::getPersonality() const { + // FIXME: Until PR1414 will be fixed, we're using 1 personality function per + // function + return !LandingPads.empty() ? LandingPads[0].Personality : NULL; +} + +/// getPersonalityIndex - Return unique index for current personality +/// function. NULL/first personality function should always get zero index. +unsigned MachineModuleInfo::getPersonalityIndex() const { + const Function* Personality = NULL; + + // Scan landing pads. If there is at least one non-NULL personality - use it. + for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) + if (LandingPads[i].Personality) { + Personality = LandingPads[i].Personality; + break; + } + + for (unsigned i = 0, e = Personalities.size(); i < e; ++i) { + if (Personalities[i] == Personality) + return i; + } + + // This will happen if the current personality function is + // in the zero index. + return 0; +} diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp new file mode 100644 index 0000000..a1c7e9f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -0,0 +1,45 @@ +//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements object-file format specific implementations of +// MachineModuleInfoImpl. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineModuleInfoMachO +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +void MachineModuleInfoMachO::anchor() {} +void MachineModuleInfoELF::anchor() {} + +static int SortSymbolPair(const void *LHS, const void *RHS) { + typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; + const MCSymbol *LHSS = ((const PairTy *)LHS)->first; + const MCSymbol *RHSS = ((const PairTy *)RHS)->first; + return LHSS->getName().compare(RHSS->getName()); +} + +/// GetSortedStubs - Return the entries from a DenseMap in a deterministic +/// sorted orer. +MachineModuleInfoImpl::SymbolListTy +MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*, + MachineModuleInfoImpl::StubValueTy>&Map) { + MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); + + if (!List.empty()) + qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + return List; +} + diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp new file mode 100644 index 0000000..cb204fd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp @@ -0,0 +1,55 @@ +//===-- CodeGen/MachineInstr.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the machine function pass registry for register allocators +// and instruction schedulers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePassRegistry.h" + +using namespace llvm; + +void MachinePassRegistryListener::anchor() { } + +/// setDefault - Set the default constructor by name. +void MachinePassRegistry::setDefault(StringRef Name) { + MachinePassCtor Ctor = 0; + for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { + if (R->getName() == Name) { + Ctor = R->getCtor(); + break; + } + } + assert(Ctor && "Unregistered pass name"); + setDefault(Ctor); +} + +/// Add - Adds a function pass to the registration list. +/// +void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { + Node->setNext(List); + List = Node; + if (Listener) Listener->NotifyAdd(Node->getName(), + Node->getCtor(), + Node->getDescription()); +} + + +/// Remove - Removes a function pass from the registration list. +/// +void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) { + for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) { + if (*I == Node) { + if (Listener) Listener->NotifyRemove(Node->getName()); + *I = (*I)->getNext(); + break; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp new file mode 100644 index 0000000..c3f6e92 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp @@ -0,0 +1,55 @@ +//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// post dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePostDominators.h" + +using namespace llvm; + +char MachinePostDominatorTree::ID = 0; + +//declare initializeMachinePostDominatorTreePass +INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", + "MachinePostDominator Tree Construction", true, true) + +MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { + initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); + DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate + // postdominator +} + +FunctionPass * +MachinePostDominatorTree::createMachinePostDominatorTreePass() { + return new MachinePostDominatorTree(); +} + +bool +MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + return false; +} + +MachinePostDominatorTree::~MachinePostDominatorTree() { + delete DT; +} + +void +MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void +MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { + DT->print(OS); +} diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp new file mode 100644 index 0000000..1af00e8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -0,0 +1,360 @@ +//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MachineRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) + : TRI(&TRI), IsSSA(true), TracksLiveness(true) { + VRegInfo.reserve(256); + RegAllocHints.reserve(256); + UsedRegUnits.resize(TRI.getNumRegUnits()); + UsedPhysRegMask.resize(TRI.getNumRegs()); + + // Create the physreg use/def lists. + PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; + memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); +} + +MachineRegisterInfo::~MachineRegisterInfo() { + delete [] PhysRegUseDefLists; +} + +/// setRegClass - Set the register class of the specified virtual register. +/// +void +MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); + VRegInfo[Reg].first = RC; +} + +const TargetRegisterClass * +MachineRegisterInfo::constrainRegClass(unsigned Reg, + const TargetRegisterClass *RC, + unsigned MinNumRegs) { + const TargetRegisterClass *OldRC = getRegClass(Reg); + if (OldRC == RC) + return RC; + const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC); + if (!NewRC || NewRC == OldRC) + return NewRC; + if (NewRC->getNumRegs() < MinNumRegs) + return 0; + setRegClass(Reg, NewRC); + return NewRC; +} + +bool +MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterClass *OldRC = getRegClass(Reg); + const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); + + // Stop early if there is no room to grow. + if (NewRC == OldRC) + return false; + + // Accumulate constraints from all uses. + for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; + ++I) { + const TargetRegisterClass *OpRC = + I->getRegClassConstraint(I.getOperandNo(), TII, TRI); + if (unsigned SubIdx = I.getOperand().getSubReg()) { + if (OpRC) + NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx); + else + NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx); + } else if (OpRC) + NewRC = TRI->getCommonSubClass(NewRC, OpRC); + if (!NewRC || NewRC == OldRC) + return false; + } + setRegClass(Reg, NewRC); + return true; +} + +/// createVirtualRegister - Create and return a new virtual register in the +/// function with the specified register class. +/// +unsigned +MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ + assert(RegClass && "Cannot create register without RegClass!"); + assert(RegClass->isAllocatable() && + "Virtual register RegClass must be allocatable."); + + // New virtual register number. + unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + VRegInfo.grow(Reg); + VRegInfo[Reg].first = RegClass; + RegAllocHints.grow(Reg); + return Reg; +} + +/// clearVirtRegs - Remove all virtual registers (after physreg assignment). +void MachineRegisterInfo::clearVirtRegs() { +#ifndef NDEBUG + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) + assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && + "Vreg use list non-empty still?"); +#endif + VRegInfo.clear(); +} + +/// Add MO to the linked list of operands for its register. +void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { + assert(!MO->isOnRegUseList() && "Already on list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + + // Head points to the first list element. + // Next is NULL on the last list element. + // Prev pointers are circular, so Head->Prev == Last. + + // Head is NULL for an empty list. + if (!Head) { + MO->Contents.Reg.Prev = MO; + MO->Contents.Reg.Next = 0; + HeadRef = MO; + return; + } + assert(MO->getReg() == Head->getReg() && "Different regs on the same list!"); + + // Insert MO between Last and Head in the circular Prev chain. + MachineOperand *Last = Head->Contents.Reg.Prev; + assert(Last && "Inconsistent use list"); + assert(MO->getReg() == Last->getReg() && "Different regs on the same list!"); + Head->Contents.Reg.Prev = MO; + MO->Contents.Reg.Prev = Last; + + // Def operands always precede uses. This allows def_iterator to stop early. + // Insert def operands at the front, and use operands at the back. + if (MO->isDef()) { + // Insert def at the front. + MO->Contents.Reg.Next = Head; + HeadRef = MO; + } else { + // Insert use at the end. + MO->Contents.Reg.Next = 0; + Last->Contents.Reg.Next = MO; + } +} + +/// Remove MO from its use-def list. +void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { + assert(MO->isOnRegUseList() && "Operand not on use list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + assert(Head && "List already empty"); + + // Unlink this from the doubly linked list of operands. + MachineOperand *Next = MO->Contents.Reg.Next; + MachineOperand *Prev = MO->Contents.Reg.Prev; + + // Prev links are circular, next link is NULL instead of looping back to Head. + if (MO == Head) + HeadRef = Next; + else + Prev->Contents.Reg.Next = Next; + + (Next ? Next : Head)->Contents.Reg.Prev = Prev; + + MO->Contents.Reg.Prev = 0; + MO->Contents.Reg.Next = 0; +} + +/// Move NumOps operands from Src to Dst, updating use-def lists as needed. +/// +/// The Dst range is assumed to be uninitialized memory. (Or it may contain +/// operands that won't be destroyed, which is OK because the MO destructor is +/// trivial anyway). +/// +/// The Src and Dst ranges may overlap. +void MachineRegisterInfo::moveOperands(MachineOperand *Dst, + MachineOperand *Src, + unsigned NumOps) { + assert(Src != Dst && NumOps && "Noop moveOperands"); + + // Copy backwards if Dst is within the Src range. + int Stride = 1; + if (Dst >= Src && Dst < Src + NumOps) { + Stride = -1; + Dst += NumOps - 1; + Src += NumOps - 1; + } + + // Copy one operand at a time. + do { + new (Dst) MachineOperand(*Src); + + // Dst takes Src's place in the use-def chain. + if (Src->isReg()) { + MachineOperand *&Head = getRegUseDefListHead(Src->getReg()); + MachineOperand *Prev = Src->Contents.Reg.Prev; + MachineOperand *Next = Src->Contents.Reg.Next; + assert(Head && "List empty, but operand is chained"); + assert(Prev && "Operand was not on use-def list"); + + // Prev links are circular, next link is NULL instead of looping back to + // Head. + if (Src == Head) + Head = Dst; + else + Prev->Contents.Reg.Next = Dst; + + // Update Prev pointer. This also works when Src was pointing to itself + // in a 1-element list. In that case Head == Dst. + (Next ? Next : Head)->Contents.Reg.Prev = Dst; + } + + Dst += Stride; + Src += Stride; + } while (--NumOps); +} + +/// replaceRegWith - Replace all instances of FromReg with ToReg in the +/// machine function. This is like llvm-level X->replaceAllUsesWith(Y), +/// except that it also changes any definitions of the register as well. +void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { + assert(FromReg != ToReg && "Cannot replace a reg with itself"); + + // TODO: This could be more efficient by bulk changing the operands. + for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { + MachineOperand &O = I.getOperand(); + ++I; + O.setReg(ToReg); + } +} + + +/// getVRegDef - Return the machine instr that defines the specified virtual +/// register or null if none is found. This assumes that the code is in SSA +/// form, so there should only be one definition. +MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { + // Since we are in SSA form, we can use the first definition. + def_iterator I = def_begin(Reg); + assert((I.atEnd() || llvm::next(I) == def_end()) && + "getVRegDef assumes a single definition or no definition"); + return !I.atEnd() ? &*I : 0; +} + +/// getUniqueVRegDef - Return the unique machine instr that defines the +/// specified virtual register or null if none is found. If there are +/// multiple definitions or no definition, return null. +MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { + if (def_empty(Reg)) return 0; + def_iterator I = def_begin(Reg); + if (llvm::next(I) != def_end()) + return 0; + return &*I; +} + +bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { + use_nodbg_iterator UI = use_nodbg_begin(RegNo); + if (UI == use_nodbg_end()) + return false; + return ++UI == use_nodbg_end(); +} + +/// clearKillFlags - Iterate over all the uses of the given register and +/// clear the kill flag from the MachineOperand. This function is used by +/// optimization passes which extend register lifetimes and need only +/// preserve conservative kill flag information. +void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { + for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI) + UI.getOperand().setIsKill(false); +} + +bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == Reg || I->second == Reg) + return true; + return false; +} + +/// getLiveInPhysReg - If VReg is a live-in virtual register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->second == VReg) + return I->first; + return 0; +} + +/// getLiveInVirtReg - If PReg is a live-in physical register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == PReg) + return I->second; + return 0; +} + +/// EmitLiveInCopies - Emit copies to initialize livein virtual registers +/// into the given entry block. +void +MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + // Emit the copies into the top of the block. + for (unsigned i = 0, e = LiveIns.size(); i != e; ++i) + if (LiveIns[i].second) { + if (use_empty(LiveIns[i].second)) { + // The livein has no uses. Drop it. + // + // It would be preferable to have isel avoid creating live-in + // records for unused arguments in the first place, but it's + // complicated by the debug info code for arguments. + LiveIns.erase(LiveIns.begin() + i); + --i; --e; + } else { + // Emit a copy. + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), + TII.get(TargetOpcode::COPY), LiveIns[i].second) + .addReg(LiveIns[i].first); + + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } + } else { + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } +} + +#ifndef NDEBUG +void MachineRegisterInfo::dumpUses(unsigned Reg) const { + for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) + I.getOperand().getParent()->dump(); +} +#endif + +void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { + ReservedRegs = TRI->getReservedRegs(MF); + assert(ReservedRegs.size() == TRI->getNumRegs() && + "Invalid ReservedRegs vector from target"); +} + +bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + + // Check if any overlapping register is modified, or allocatable so it may be + // used later. + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + if (!def_empty(*AI) || isAllocatable(*AI)) + return false; + return true; +} diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp new file mode 100644 index 0000000..bb6aad7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -0,0 +1,364 @@ +//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachineSSAUpdater class. It's based on SSAUpdater +// class in lib/Transforms/Utils. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" +using namespace llvm; + +typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, + SmallVectorImpl<MachineInstr*> *NewPHI) + : AV(0), InsertedPHIs(NewPHI) { + TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); +} + +MachineSSAUpdater::~MachineSSAUpdater() { + delete static_cast<AvailableValsTy*>(AV); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates. ProtoValue is the value used to name PHI nodes. +void MachineSSAUpdater::Initialize(unsigned V) { + if (AV == 0) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + + VR = V; + VRC = MRI->getRegClass(VR); +} + +/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for +/// the specified block. +bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) { + getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { + return GetValueAtEndOfBlockInternal(BB); +} + +static +unsigned LookForIdenticalPHI(MachineBasicBlock *BB, + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) { + if (BB->empty()) + return 0; + + MachineBasicBlock::iterator I = BB->begin(); + if (!I->isPHI()) + return 0; + + AvailableValsTy AVals; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + AVals[PredValues[i].first] = PredValues[i].second; + while (I != BB->end() && I->isPHI()) { + bool Same = true; + for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { + unsigned SrcReg = I->getOperand(i).getReg(); + MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); + if (AVals[SrcBB] != SrcReg) { + Same = false; + break; + } + } + if (Same) + return I->getOperand(0).getReg(); + ++I; + } + return 0; +} + +/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define +/// a value of the given register class at the start of the specified basic +/// block. It returns the virtual register defined by the instruction. +static +MachineInstrBuilder InsertNewDef(unsigned Opcode, + MachineBasicBlock *BB, MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + unsigned NewVR = MRI->createVirtualRegister(RC); + return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB. Consider code like this: +/// +/// X1 = ... +/// SomeBB: +/// use(X) +/// X2 = ... +/// br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks. However, the use of X happens in the *middle* of +/// a block. Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!HasValueForBlock(BB)) + return GetValueAtEndOfBlockInternal(BB); + + // If there are no predecessors, just return undef. + if (BB->pred_empty()) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + VRC, MRI, TII); + return NewDef->getOperand(0).getReg(); + } + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues; + unsigned SingularValue = 0; + + bool isFirstPred = true; + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // If an identical PHI is already in BB, just reuse it. + unsigned DupPHI = LookForIdenticalPHI(BB, PredValues); + if (DupPHI) + return DupPHI; + + // Otherwise, we do need a PHI: insert one now. + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); + MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, + Loc, VRC, MRI, TII); + + // Fill in all the predecessors of the PHI. + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + InsertedPHI->eraseFromParent(); + return ConstVal; + } + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI->getOperand(0).getReg(); +} + +static +MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, + MachineOperand *U) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + if (&MI->getOperand(i) == U) + return MI->getOperand(i+1).getMBB(); + } + + llvm_unreachable("MachineOperand::getParent() failure?"); +} + +/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void MachineSSAUpdater::RewriteUse(MachineOperand &U) { + MachineInstr *UseMI = U.getParent(); + unsigned NewVR = 0; + if (UseMI->isPHI()) { + MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U); + NewVR = GetValueAtEndOfBlockInternal(SourceBB); + } else { + NewVR = GetValueInMiddleOfBlock(UseMI->getParent()); + } + + U.setReg(NewVR); +} + +void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { + MRI->replaceRegWith(OldReg, NewReg); + + AvailableValsTy &AvailableVals = getAvailableVals(AV); + for (DenseMap<MachineBasicBlock*, unsigned>::iterator + I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I) + if (I->second == OldReg) + I->second = NewReg; +} + +/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl +/// template, specialized for MachineSSAUpdater. +namespace llvm { +template<> +class SSAUpdaterTraits<MachineSSAUpdater> { +public: + typedef MachineBasicBlock BlkT; + typedef unsigned ValT; + typedef MachineInstr PhiT; + + typedef MachineBasicBlock::succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } + + /// Iterator for PHI operands. + class PHI_iterator { + private: + MachineInstr *PHI; + unsigned idx; + + public: + explicit PHI_iterator(MachineInstr *P) // begin iterator + : PHI(P), idx(1) {} + PHI_iterator(MachineInstr *P, bool) // end iterator + : PHI(P), idx(PHI->getNumOperands()) {} + + PHI_iterator &operator++() { idx += 2; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { + return PHI->getOperand(idx+1).getMBB(); + } + }; + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of BB into the Preds + /// vector. + static void FindPredecessorBlocks(MachineBasicBlock *BB, + SmallVectorImpl<MachineBasicBlock*> *Preds){ + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) + Preds->push_back(*PI); + } + + /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register. + /// Add it into the specified block and return the register. + static unsigned GetUndefVal(MachineBasicBlock *BB, + MachineSSAUpdater *Updater) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + Updater->VRC, Updater->MRI, + Updater->TII); + return NewDef->getOperand(0).getReg(); + } + + /// CreateEmptyPHI - Create a PHI instruction that defines a new register. + /// Add it into the specified block and return the register. + static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, + MachineSSAUpdater *Updater) { + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); + MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, + Updater->VRC, Updater->MRI, + Updater->TII); + return PHI->getOperand(0).getReg(); + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(MachineInstr *PHI, unsigned Val, + MachineBasicBlock *Pred) { + MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred); + } + + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static MachineInstr *InstrIsPHI(MachineInstr *I) { + if (I && I->isPHI()) + return I; + return 0; + } + + /// ValueIsPHI - Check if the instruction that defines the specified register + /// is a PHI instruction. + static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) { + return InstrIsPHI(Updater->MRI->getVRegDef(Val)); + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) { + MachineInstr *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumOperands() <= 1) + return PHI; + return 0; + } + + /// GetPHIValue - For the specified PHI instruction, return the register + /// that it defines. + static unsigned GetPHIValue(MachineInstr *PHI) { + return PHI->getOperand(0).getReg(); + } +}; + +} // End llvm namespace + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. +unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ + AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (unsigned V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); +} diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp new file mode 100644 index 0000000..5bd2349 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -0,0 +1,2396 @@ +//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include <queue> + +using namespace llvm; + +namespace llvm { +cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); +} + +#ifndef NDEBUG +static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + +// Experimental heuristics +static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, + cl::desc("Enable load clustering."), cl::init(true)); + +// Experimental heuristics +static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + +static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + +// DAG subtrees must have at least this many nodes. +static const unsigned MinSubtreeSize = 8; + +//===----------------------------------------------------------------------===// +// Machine Instruction Scheduling Pass and Registry +//===----------------------------------------------------------------------===// + +MachineSchedContext::MachineSchedContext(): + MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { + RegClassInfo = new RegisterClassInfo(); +} + +MachineSchedContext::~MachineSchedContext() { + delete RegClassInfo; +} + +namespace { +/// MachineScheduler runs after coalescing and before register allocation. +class MachineScheduler : public MachineSchedContext, + public MachineFunctionPass { +public: + MachineScheduler(); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory() {} + + virtual bool runOnMachineFunction(MachineFunction&); + + virtual void print(raw_ostream &O, const Module* = 0) const; + + static char ID; // Class identification, replacement for typeinfo +}; +} // namespace + +char MachineScheduler::ID = 0; + +char &llvm::MachineSchedulerID = MachineScheduler::ID; + +INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", + "Machine Instruction Scheduler", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(MachineScheduler, "misched", + "Machine Instruction Scheduler", false, false) + +MachineScheduler::MachineScheduler() +: MachineFunctionPass(ID) { + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachinePassRegistry MachineSchedRegistry::Registry; + +/// A dummy default scheduler factory indicates whether the scheduler +/// is overridden on the command line. +static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { + return 0; +} + +/// MachineSchedOpt allows command line selection of the scheduler. +static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, + RegisterPassParser<MachineSchedRegistry> > +MachineSchedOpt("misched", + cl::init(&useDefaultMachineSched), cl::Hidden, + cl::desc("Machine instruction scheduler to use")); + +static MachineSchedRegistry +DefaultSchedRegistry("default", "Use the target's default scheduler choice.", + useDefaultMachineSched); + +/// Forward declare the standard machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); + + +/// Decrement this iterator until reaching the top or a non-debug instr. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { + assert(I != Beg && "reached the top of the region, cannot decrement"); + while (--I != Beg) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// If this iterator is a debug value, increment until reaching the End or a +/// non-debug instruction. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { + for(; I != End; ++I) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// Top-level MachineScheduler pass driver. +/// +/// Visit blocks in function order. Divide each block into scheduling regions +/// and visit them bottom-up. Visiting regions bottom-up is not required, but is +/// consistent with the DAG builder, which traverses the interior of the +/// scheduling regions bottom-up. +/// +/// This design avoids exposing scheduling boundaries to the DAG builder, +/// simplifying the DAG builder's support for "special" target instructions. +/// At the same time the design allows target schedulers to operate across +/// scheduling boundaries, for example to bundle the boudary instructions +/// without reordering them. This creates complexity, because the target +/// scheduler must update the RegionBegin and RegionEnd positions cached by +/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler +/// design would be to split blocks at scheduling boundaries, but LLVM has a +/// general bias against block splitting purely for implementation simplicity. +bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); + + // Initialize the context of the pass. + MF = &mf; + MLI = &getAnalysis<MachineLoopInfo>(); + MDT = &getAnalysis<MachineDominatorTree>(); + PassConfig = &getAnalysis<TargetPassConfig>(); + AA = &getAnalysis<AliasAnalysis>(); + + LIS = &getAnalysis<LiveIntervals>(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + if (VerifyScheduling) { + DEBUG(LIS->print(dbgs())); + MF->verify(this, "Before machine scheduling."); + } + RegClassInfo->runOnMachineFunction(*MF); + + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor == useDefaultMachineSched) { + // Get the default scheduler set by the target. + Ctor = MachineSchedRegistry::getDefault(); + if (!Ctor) { + Ctor = createConvergingSched; + MachineSchedRegistry::setDefault(Ctor); + } + } + // Instantiate the selected scheduler. + OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); + + // Visit all machine basic blocks. + // + // TODO: Visit blocks in global postorder or postorder within the bottom-up + // loop tree. Then we can optionally compute global RegPressure. + for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); + MBB != MBBEnd; ++MBB) { + + Scheduler->startBlock(MBB); + + // Break the block into scheduling regions [I, RegionEnd), and schedule each + // region as soon as it is discovered. RegionEnd points the scheduling + // boundary at the bottom of the region. The DAG does not include RegionEnd, + // but the region does (i.e. the next RegionEnd is above the previous + // RegionBegin). If the current block has no terminator then RegionEnd == + // MBB->end() for the bottom region. + // + // The Scheduler may insert instructions during either schedule() or + // exitRegion(), even for empty regions. So the local iterators 'I' and + // 'RegionEnd' are invalid across these calls. + unsigned RemainingInstrs = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() + || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + --RegionEnd; + // Count the boundary instruction. + --RemainingInstrs; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingInstrs) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + break; + } + // Notify the scheduler of the region, even if we may skip scheduling + // it. Perhaps it still needs to be bundled. + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); + + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + // Close the current region. Bundle the terminator if needed. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->exitRegion(); + continue; + } + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << MF->getName() + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *I << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " Remaining: " << RemainingInstrs << "\n"); + + // Schedule a region: possibly reorder instructions. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->schedule(); + + // Close the current region. + Scheduler->exitRegion(); + + // Scheduling has invalidated the current iterator 'I'. Ask the + // scheduler for the top of it's scheduled region. + RegionEnd = Scheduler->begin(); + } + assert(RemainingInstrs == 0 && "Instruction count mismatch!"); + Scheduler->finishBlock(); + } + Scheduler->finalizeSchedule(); + DEBUG(LIS->print(dbgs())); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); + return true; +} + +void MachineScheduler::print(raw_ostream &O, const Module* m) const { + // unimplemented +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ReadyQueue::dump() { + dbgs() << " " << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; +} +#endif + +//===----------------------------------------------------------------------===// +// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals +// preservation. +//===----------------------------------------------------------------------===// + +ScheduleDAGMI::~ScheduleDAGMI() { + delete DFSResult; + DeleteContainerPointers(Mutations); + delete SchedImpl; +} + +bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { + if (SuccSU != &ExitSU) { + // Do not use WillCreateCycle, it assumes SD scheduling. + // If Pred is reachable from Succ, then the edge creates a cycle. + if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) + return false; + Topo.AddPred(SuccSU, PredDep.getSUnit()); + } + SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); + // Return true regardless of whether a new edge needed to be inserted. + return true; +} + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When +/// NumPredsLeft reaches zero, release the successor node. +/// +/// FIXME: Adjust SuccSU height based on MinLatency. +void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + if (SuccEdge->isCluster()) + NextClusterSucc = SuccSU; + return; + } +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + SchedImpl->releaseTopNode(SuccSU); +} + +/// releaseSuccessors - Call releaseSucc on each of SU's successors. +void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + releaseSucc(SU, &*I); + } +} + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +/// +/// FIXME: Adjust PredSU height based on MinLatency. +void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + + if (PredEdge->isWeak()) { + --PredSU->WeakSuccsLeft; + if (PredEdge->isCluster()) + NextClusterPred = PredSU; + return; + } +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void ScheduleDAGMI::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +void ScheduleDAGMI::moveInstruction(MachineInstr *MI, + MachineBasicBlock::iterator InsertPos) { + // Advance RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + ++RegionBegin; + + // Update the instruction stream. + BB->splice(InsertPos, BB, MI); + + // Update LiveIntervals + LIS->handleMove(MI, /*UpdateFlags=*/true); + + // Recede RegionBegin if an instruction moves above the first. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool ScheduleDAGMI::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + + // For convenience remember the end of the liveness region. + LiveRegionEnd = + (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); +} + +// Setup the register pressure trackers for the top scheduled top and bottom +// scheduled regions. +void ScheduleDAGMI::initRegPressure() { + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + DEBUG(RPTracker.getPressure().dump(TRI)); + + // Initialize the live ins and live outs. + TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); + BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); + + // Close one end of the tracker so we can call + // getMaxUpward/DownwardPressureDelta before advancing across any + // instructions. This converts currently live regs into live ins/outs. + TopRPTracker.closeTop(); + BotRPTracker.closeBottom(); + + // Account for liveness generated by the region boundary. + if (LiveRegionEnd != RegionEnd) + BotRPTracker.recede(); + + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + + // Cache the list of excess pressure sets in this region. This will also track + // the max pressure in the scheduled code for these sets. + RegionCriticalPSets.clear(); + const std::vector<unsigned> &RegionPressure = + RPTracker.getPressure().MaxSetPressure; + for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << "Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); + if (RegionPressure[i] > Limit) + RegionCriticalPSets.push_back(PressureElement(i, 0)); + } + DEBUG(dbgs() << "Excess PSets: "; + for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + dbgs() << TRI->getRegPressureSetName( + RegionCriticalPSets[i].PSetID) << " "; + dbgs() << "\n"); +} + +// FIXME: When the pressure tracker deals in pressure differences then we won't +// iterate over all RegionCriticalPSets[i]. +void ScheduleDAGMI:: +updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { + for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { + unsigned ID = RegionCriticalPSets[i].PSetID; + int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; + if ((int)NewMaxPressure[ID] > MaxUnits) + MaxUnits = NewMaxPressure[ID]; + } +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +/// +/// This is a skeletal driver, with all the functionality pushed into helpers, +/// so that it can be easilly extended by experimental schedulers. Generally, +/// implementing MachineSchedStrategy should be sufficient to implement a new +/// scheduling algorithm. However, if a scheduler further subclasses +/// ScheduleDAGMI then it will want to override this virtual method in order to +/// update any specialized state. +void ScheduleDAGMI::schedule() { + buildDAGWithRegPressure(); + + Topo.InitDAGTopologicalSorting(); + + postprocessDAG(); + + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Build the DAG and setup three register pressure trackers. +void ScheduleDAGMI::buildDAGWithRegPressure() { + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + +void ScheduleDAGMI::computeDFSResult() { + if (!DFSResult) + DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); + DFSResult->clear(); + ScheduledTrees.clear(); + DFSResult->resize(SUnits.size()); + DFSResult->compute(SUnits); + ScheduledTrees.resize(DFSResult->getNumSubtrees()); +} + +void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, + SmallVectorImpl<SUnit*> &BotRoots) { + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + + // A SUnit is ready to top schedule if it has no predecessors. + if (!I->NumPredsLeft) + TopRoots.push_back(SU); + // A SUnit is ready to bottom schedule if it has no successors. + if (!I->NumSuccsLeft) + BotRoots.push_back(SU); + } + ExitSU.biasCriticalPath(); +} + +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, + ArrayRef<SUnit*> BotRoots) { + NextClusterSucc = NULL; + NextClusterPred = NULL; + + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl<SUnit*>::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } + + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + SchedImpl->registerRoots(); + + // Advance past initial DebugValues. + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + TopRPTracker.setPos(CurrentTop); + + CurrentBottom = RegionEnd; +} + +/// Move an instruction and update register pressure. +void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { + moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); + } + + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); + } + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + // Update bottom scheduled pressure. + BotRPTracker.recede(); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + } +} + +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { + // Release dependent instructions for scheduling. + if (IsTopNode) + releaseSuccessors(SU); + else + releasePredecessors(SU); + + SU->isScheduled = true; + + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void ScheduleDAGMI::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + if (&*RegionBegin == DbgValue) + ++RegionBegin; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == llvm::prior(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = NULL; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + +//===----------------------------------------------------------------------===// +// LoadClusterMutation - DAG post-processing to cluster loads. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create cluster edges between neighboring +/// loads. +class LoadClusterMutation : public ScheduleDAGMutation { + struct LoadInfo { + SUnit *SU; + unsigned BaseReg; + unsigned Offset; + LoadInfo(SUnit *su, unsigned reg, unsigned ofs) + : SU(su), BaseReg(reg), Offset(ofs) {} + }; + static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, + const LoadClusterMutation::LoadInfo &RHS); + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; +public: + LoadClusterMutation(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) + : TII(tii), TRI(tri) {} + + virtual void apply(ScheduleDAGMI *DAG); +protected: + void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); +}; +} // anonymous + +bool LoadClusterMutation::LoadInfoLess( + const LoadClusterMutation::LoadInfo &LHS, + const LoadClusterMutation::LoadInfo &RHS) { + if (LHS.BaseReg != RHS.BaseReg) + return LHS.BaseReg < RHS.BaseReg; + return LHS.Offset < RHS.Offset; +} + +void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, + ScheduleDAGMI *DAG) { + SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; + for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { + SUnit *SU = Loads[Idx]; + unsigned BaseReg; + unsigned Offset; + if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) + LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); + } + if (LoadRecords.size() < 2) + return; + std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); + unsigned ClusterLength = 1; + for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { + if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { + ClusterLength = 1; + continue; + } + + SUnit *SUa = LoadRecords[Idx].SU; + SUnit *SUb = LoadRecords[Idx+1].SU; + if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) + && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { + + DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" + << SUb->NodeNum << ")\n"); + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since nearby + // loads should have effectively the same inputs. + for (SUnit::const_succ_iterator + SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { + if (SI->getSUnit() == SUb) + continue; + DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); + DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + } + ++ClusterLength; + } + else + ClusterLength = 1; + } +} + +/// \brief Callback from DAG postProcessing to create cluster edges for loads. +void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { + // Map DAG NodeNum to store chain ID. + DenseMap<unsigned, unsigned> StoreChainIDs; + // Map each store chain to a set of dependent loads. + SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->mayLoad()) + continue; + unsigned ChainPredID = DAG->SUnits.size(); + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->isCtrl()) { + ChainPredID = PI->getSUnit()->NodeNum; + break; + } + } + // Check if this chain-like pred has been seen + // before. ChainPredID==MaxNodeID for loads at the top of the schedule. + unsigned NumChains = StoreChainDependents.size(); + std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = + StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); + if (Result.second) + StoreChainDependents.resize(NumChains + 1); + StoreChainDependents[Result.first->second].push_back(SU); + } + // Iterate over the store chains. + for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) + clusterNeighboringLoads(StoreChainDependents[Idx], DAG); +} + +//===----------------------------------------------------------------------===// +// MacroFusion - DAG post-processing to encourage fusion of macro ops. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create cluster edges between instructions +/// that may be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { + const TargetInstrInfo *TII; +public: + MacroFusion(const TargetInstrInfo *tii): TII(tii) {} + + virtual void apply(ScheduleDAGMI *DAG); +}; +} // anonymous + +/// \brief Callback from DAG postProcessing to create cluster edges to encourage +/// fused operations. +void MacroFusion::apply(ScheduleDAGMI *DAG) { + // For now, assume targets can only fuse with the branch. + MachineInstr *Branch = DAG->ExitSU.getInstr(); + if (!Branch) + return; + + for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { + SUnit *SU = &DAG->SUnits[--Idx]; + if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) + continue; + + // Create a single weak edge from SU to ExitSU. The only effect is to cause + // bottom-up scheduling to heavily prioritize the clustered SU. There is no + // need to copy predecessor edges from ExitSU to SU, since top-down + // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling + // of SU, we could create an artificial edge from the deepest root, but it + // hasn't been needed yet. + bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); + (void)Success; + assert(Success && "No DAG nodes should be reachable from ExitSU"); + + DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); + break; + } +} + +//===----------------------------------------------------------------------===// +// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +namespace { +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy { +public: + /// Represent the type of SchedCandidate found within a single queue. + /// pickNodeBidirectional depends on these listed by decreasing priority. + enum CandReason { + NoCand, SingleExcess, SingleCritical, Cluster, + ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, + TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, + NodeOrder}; + +#ifndef NDEBUG + static const char *getReasonStr(ConvergingScheduler::CandReason Reason); +#endif + + /// Policy for scheduling the next instruction in the candidate's zone. + struct CandPolicy { + bool ReduceLatency; + unsigned ReduceResIdx; + unsigned DemandResIdx; + + CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} + }; + + /// Status of an instruction's critical resource consumption. + struct SchedResourceDelta { + // Count critical resources in the scheduled region required by SU. + unsigned CritResources; + + // Count critical resources from another region consumed by SU. + unsigned DemandedResources; + + SchedResourceDelta(): CritResources(0), DemandedResources(0) {} + + bool operator==(const SchedResourceDelta &RHS) const { + return CritResources == RHS.CritResources + && DemandedResources == RHS.DemandedResources; + } + bool operator!=(const SchedResourceDelta &RHS) const { + return !operator==(RHS); + } + }; + + /// Store the state used by ConvergingScheduler heuristics, required for the + /// lifetime of one invocation of pickNode(). + struct SchedCandidate { + CandPolicy Policy; + + // The best SUnit candidate. + SUnit *SU; + + // The reason for this candidate. + CandReason Reason; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Critical resource consumption of the best candidate. + SchedResourceDelta ResDelta; + + SchedCandidate(const CandPolicy &policy) + : Policy(policy), SU(NULL), Reason(NoCand) {} + + bool isValid() const { return SU; } + + // Copy the status of another candidate without changing policy. + void setBest(SchedCandidate &Best) { + assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + SU = Best.SU; + Reason = Best.Reason; + RPDelta = Best.RPDelta; + ResDelta = Best.ResDelta; + } + + void initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel); + }; + + /// Summarize the unscheduled region. + struct SchedRemainder { + // Critical path through the DAG in expected latency. + unsigned CriticalPath; + + // Unscheduled resources + SmallVector<unsigned, 16> RemainingCounts; + // Critical resource for the unscheduled zone. + unsigned CritResIdx; + // Number of micro-ops left to schedule. + unsigned RemainingMicroOps; + + void reset() { + CriticalPath = 0; + RemainingCounts.clear(); + CritResIdx = 0; + RemainingMicroOps = 0; + } + + SchedRemainder() { reset(); } + + void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); + + unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { + if (!SchedModel->hasInstrSchedModel()) + return 0; + + return std::max( + RemainingMicroOps * SchedModel->getMicroOpFactor(), + RemainingCounts[CritResIdx]); + } + }; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in the direction of movement, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct SchedBoundary { + ScheduleDAGMI *DAG; + const TargetSchedModel *SchedModel; + SchedRemainder *Rem; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + // For heuristics, keep a list of the nodes that immediately depend on the + // most recently scheduled node. + SmallPtrSet<const SUnit*, 8> NextSUs; + + ScheduleHazardRecognizer *HazardRec; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // The expected latency of the critical path in this scheduled zone. + unsigned ExpectedLatency; + + // Resources used in the scheduled zone beyond this boundary. + SmallVector<unsigned, 16> ResourceCounts; + + // Cache the critical resources ID in this scheduled zone. + unsigned CritResIdx; + + // Is the scheduled region resource limited vs. latency limited. + bool IsResourceLimited; + + unsigned ExpectedCount; + +#ifndef NDEBUG + // Remember the greatest min operand latency. + unsigned MaxMinLatency; +#endif + + void reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + delete HazardRec; + + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + HazardRec = 0; + CurrCycle = 0; + IssueCount = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + ResourceCounts.resize(1); + assert(!ResourceCounts[0] && "nonzero count for bad resource"); + CritResIdx = 0; + IsResourceLimited = false; + ExpectedCount = 0; +#ifndef NDEBUG + MaxMinLatency = 0; +#endif + // Reserve a zero-count for invalid CritResIdx. + ResourceCounts.resize(1); + } + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + HazardRec(0) { + reset(); + } + + ~SchedBoundary() { delete HazardRec; } + + void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, + SchedRemainder *rem); + + bool isTop() const { + return Available.getID() == ConvergingScheduler::TopQID; + } + + unsigned getUnscheduledLatency(SUnit *SU) const { + if (isTop()) + return SU->getHeight(); + return SU->getDepth() + SU->Latency; + } + + unsigned getCriticalCount() const { + return ResourceCounts[CritResIdx]; + } + + bool checkHazard(SUnit *SU); + + void setLatencyPolicy(CandPolicy &Policy); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void countResource(unsigned PIdx, unsigned Cycles); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + +private: + ScheduleDAGMI *DAG; + const TargetSchedModel *SchedModel; + const TargetRegisterInfo *TRI; + + // State of the top and bottom scheduled instruction boundaries. + SchedRemainder Rem; + SchedBoundary Top; + SchedBoundary Bot; + +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingScheduler(): + DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initialize(ScheduleDAGMI *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + + virtual void registerRoots(); + +protected: + void balanceZones( + ConvergingScheduler::SchedBoundary &CriticalZone, + ConvergingScheduler::SchedCandidate &CriticalCand, + ConvergingScheduler::SchedBoundary &OppositeZone, + ConvergingScheduler::SchedCandidate &OppositeCand); + + void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, + ConvergingScheduler::SchedCandidate &BotCand); + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker); + + SUnit *pickNodeBidirectional(bool &IsTopNode); + + void pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); + +#ifndef NDEBUG + void traceCandidate(const SchedCandidate &Cand); +#endif +}; +} // namespace + +void ConvergingScheduler::SchedRemainder:: +init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { + reset(); + if (!SchedModel->hasInstrSchedModel()) + return; + RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); + for (std::vector<SUnit>::iterator + I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); + RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + unsigned Factor = SchedModel->getResourceFactor(PIdx); + RemainingCounts[PIdx] += (Factor * PI->Cycles); + } + } + for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) + >= (int)SchedModel->getLatencyFactor()) { + CritResIdx = PIdx; + } + } +} + +void ConvergingScheduler::SchedBoundary:: +init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { + reset(); + DAG = dag; + SchedModel = smodel; + Rem = rem; + if (SchedModel->hasInstrSchedModel()) + ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); +} + +void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { + DAG = dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isWeak()) + continue; + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +void ConvergingScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. + for (std::vector<SUnit*>::const_iterator + I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" + << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); + return true; + } + return false; +} + +/// Compute the remaining latency to determine whether ILP should be increased. +void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { + // FIXME: compile time. In all, we visit four queues here one we should only + // need to visit the one that was last popped if we cache the result. + unsigned RemLatency = 0; + for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + if (RemLatency + ExpectedLatency >= CriticalPathLimit + && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { + Policy.ReduceLatency = true; + DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); + } +} + +void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + Pending.push(SU); + else + Available.push(SU); + + // Record this node as an immediate dependent of the scheduled node. + NextSUs.insert(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingScheduler::SchedBoundary::bumpCycle() { + unsigned Width = SchedModel->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + unsigned NextCycle = CurrCycle + 1; + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + if (MinReadyCycle > NextCycle) { + IssueCount = 0; + NextCycle = MinReadyCycle; + } + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } + else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + + DEBUG(dbgs() << " " << Available.getName() + << " Cycle: " << CurrCycle << '\n'); +} + +/// Add the given processor resource to this scheduled zone. +void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, + unsigned Cycles) { + unsigned Factor = SchedModel->getResourceFactor(PIdx); + DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name + << " +(" << Cycles << "x" << Factor + << ") / " << SchedModel->getLatencyFactor() << '\n'); + + unsigned Count = Factor * Cycles; + ResourceCounts[PIdx] += Count; + assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); + Rem->RemainingCounts[PIdx] -= Count; + + // Check if this resource exceeds the current critical resource by a full + // cycle. If so, it becomes the critical resource. + if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) + >= (int)SchedModel->getLatencyFactor()) { + CritResIdx = PIdx; + DEBUG(dbgs() << " *** Critical resource " + << SchedModel->getProcResource(PIdx)->Name << " x" + << ResourceCounts[PIdx] << '\n'); + } +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + // Update resource counts and critical resource. + if (SchedModel->hasInstrSchedModel()) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + countResource(PI->ProcResourceIdx, PI->Cycles); + } + } + if (isTop()) { + if (SU->getDepth() > ExpectedLatency) + ExpectedLatency = SU->getDepth(); + } + else { + if (SU->getHeight() > ExpectedLatency) + ExpectedLatency = SU->getHeight(); + } + + IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + + // checkHazard prevents scheduling multiple instructions per cycle that exceed + // issue width. However, we commonly reach the maximum. In this case + // opportunistically bump the cycle to avoid uselessly checking everything in + // the readyQ. Furthermore, a single instruction may produce more than one + // cycle's worth of micro-ops. + if (IssueCount >= SchedModel->getIssueWidth()) { + DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingScheduler::SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + DEBUG(if (!Pending.empty()) Pending.dump()); + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// defer any nodes that now hit a hazard, and advance the cycle until at least +/// one node is ready. If multiple instructions are ready, return NULL. +SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + if (IssueCount > 0) { + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; + } + ++I; + } + } + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +/// Record the candidate policy for opposite zones with different critical +/// resources. +/// +/// If the CriticalZone is latency limited, don't force a policy for the +/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. +void ConvergingScheduler::balanceZones( + ConvergingScheduler::SchedBoundary &CriticalZone, + ConvergingScheduler::SchedCandidate &CriticalCand, + ConvergingScheduler::SchedBoundary &OppositeZone, + ConvergingScheduler::SchedCandidate &OppositeCand) { + + if (!CriticalZone.IsResourceLimited) + return; + assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); + + SchedRemainder *Rem = CriticalZone.Rem; + + // If the critical zone is overconsuming a resource relative to the + // remainder, try to reduce it. + unsigned RemainingCritCount = + Rem->RemainingCounts[CriticalZone.CritResIdx]; + if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) + > (int)SchedModel->getLatencyFactor()) { + CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; + DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " + << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name + << '\n'); + } + // If the other zone is underconsuming a resource relative to the full zone, + // try to increase it. + unsigned OppositeCount = + OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; + if ((int)(OppositeZone.ExpectedCount - OppositeCount) + > (int)SchedModel->getLatencyFactor()) { + OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; + DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " + << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name + << '\n'); + } +} + +/// Determine if the scheduled zones exceed resource limits or critical path and +/// set each candidate's ReduceHeight policy accordingly. +void ConvergingScheduler::checkResourceLimits( + ConvergingScheduler::SchedCandidate &TopCand, + ConvergingScheduler::SchedCandidate &BotCand) { + + // Set ReduceLatency to true if needed. + Bot.setLatencyPolicy(BotCand.Policy); + Top.setLatencyPolicy(TopCand.Policy); + + // Handle resource-limited regions. + if (Top.IsResourceLimited && Bot.IsResourceLimited + && Top.CritResIdx == Bot.CritResIdx) { + // If the scheduled critical resource in both zones is no longer the + // critical remaining resource, attempt to reduce resource height both ways. + if (Top.CritResIdx != Rem.CritResIdx) { + TopCand.Policy.ReduceResIdx = Top.CritResIdx; + BotCand.Policy.ReduceResIdx = Bot.CritResIdx; + DEBUG(dbgs() << "Reduce scheduled " + << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); + } + return; + } + // Handle latency-limited regions. + if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { + // If the total scheduled expected latency exceeds the region's critical + // path then reduce latency both ways. + // + // Just because a zone is not resource limited does not mean it is latency + // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle + // to exceed expected latency. + if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) + && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { + TopCand.Policy.ReduceLatency = true; + BotCand.Policy.ReduceLatency = true; + DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency + << " + " << Bot.ExpectedLatency << '\n'); + } + return; + } + // The critical resource is different in each zone, so request balancing. + + // Compute the cost of each zone. + Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); + Top.ExpectedCount = std::max( + Top.getCriticalCount(), + Top.ExpectedCount * SchedModel->getLatencyFactor()); + Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); + Bot.ExpectedCount = std::max( + Bot.getCriticalCount(), + Bot.ExpectedCount * SchedModel->getLatencyFactor()); + + balanceZones(Top, TopCand, Bot, BotCand); + balanceZones(Bot, BotCand, Top, TopCand); +} + +void ConvergingScheduler::SchedCandidate:: +initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel) { + if (!Policy.ReduceResIdx && !Policy.DemandResIdx) + return; + + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == Policy.ReduceResIdx) + ResDelta.CritResources += PI->Cycles; + if (PI->ProcResourceIdx == Policy.DemandResIdx) + ResDelta.DemandedResources += PI->Cycles; + } +} + +/// Return true if this heuristic determines order. +static bool tryLess(int TryVal, int CandVal, + ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::CandReason Reason) { + if (TryVal < CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal > CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + return false; +} + +static bool tryGreater(int TryVal, int CandVal, + ConvergingScheduler::SchedCandidate &TryCand, + ConvergingScheduler::SchedCandidate &Cand, + ConvergingScheduler::CandReason Reason) { + if (TryVal > CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal < CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + return false; +} + +static unsigned getWeakLeft(const SUnit *SU, bool isTop) { + return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; +} + +/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending. +/// \param RPTracker describes reg pressure within the scheduled zone. +/// \param TempTracker is a scratch pressure tracker to reuse in queries. +void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { + + // Always initialize TryCand's RPDelta. + TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + // Avoid exceeding the target's limit. + if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, + Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) + return; + if (Cand.Reason == SingleExcess) + Cand.Reason = MultiPressure; + + // Avoid increasing the max critical pressure in the scheduled region. + if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, + Cand.RPDelta.CriticalMax.UnitIncrease, + TryCand, Cand, SingleCritical)) + return; + if (Cand.Reason == SingleCritical) + Cand.Reason = MultiPressure; + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *NextClusterSU = + Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, + TryCand, Cand, Cluster)) + return; + // Currently, weak edges are for clustering, so we hard-code that reason. + // However, deferring the current TryCand will not change Cand's reason. + CandReason OrigReason = Cand.Reason; + if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), + getWeakLeft(Cand.SU, Zone.isTop()), + TryCand, Cand, Cluster)) { + Cand.Reason = OrigReason; + return; + } + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() + > Zone.ExpectedCount) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, TopDepthReduce)) + return; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, TopPathReduce)) + return; + } + else { + if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() + > Zone.ExpectedCount) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, BotHeightReduce)) + return; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, BotPathReduce)) + return; + } + } + + // Avoid increasing the max pressure of the entire region. + if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, + Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) + return; + if (Cand.Reason == SingleMax) + Cand.Reason = MultiPressure; + + // Prefer immediate defs/users of the last scheduled instruction. This is a + // nice pressure avoidance strategy that also conserves the processor's + // register renaming resources and keeps the machine code readable. + if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), + TryCand, Cand, NextDefUse)) + return; + + // Fall through to original instruction order. + if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) + || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } +} + +/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is +/// more desirable than RHS from scheduling standpoint. +static bool compareRPDelta(const RegPressureDelta &LHS, + const RegPressureDelta &RHS) { + // Compare each component of pressure in decreasing order of importance + // without checking if any are valid. Invalid PressureElements are assumed to + // have UnitIncrease==0, so are neutral. + + // Avoid increasing the max critical pressure in the scheduled region. + if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { + DEBUG(dbgs() << "RP excess top - bot: " + << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); + return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; + } + // Avoid increasing the max critical pressure in the scheduled region. + if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { + DEBUG(dbgs() << "RP critical top - bot: " + << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) + << '\n'); + return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; + } + // Avoid increasing the max pressure of the entire region. + if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { + DEBUG(dbgs() << "RP current top - bot: " + << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) + << '\n'); + return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; + } + return false; +} + +#ifndef NDEBUG +const char *ConvergingScheduler::getReasonStr( + ConvergingScheduler::CandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND "; + case SingleExcess: return "REG-EXCESS"; + case SingleCritical: return "REG-CRIT "; + case Cluster: return "CLUSTER "; + case SingleMax: return "REG-MAX "; + case MultiPressure: return "REG-MULTI "; + case ResourceReduce: return "RES-REDUCE"; + case ResourceDemand: return "RES-DEMAND"; + case TopDepthReduce: return "TOP-DEPTH "; + case TopPathReduce: return "TOP-PATH "; + case BotHeightReduce:return "BOT-HEIGHT"; + case BotPathReduce: return "BOT-PATH "; + case NextDefUse: return "DEF-USE "; + case NodeOrder: return "ORDER "; + }; + llvm_unreachable("Unknown reason!"); +} + +void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { + PressureElement P; + unsigned ResIdx = 0; + unsigned Latency = 0; + switch (Cand.Reason) { + default: + break; + case SingleExcess: + P = Cand.RPDelta.Excess; + break; + case SingleCritical: + P = Cand.RPDelta.CriticalMax; + break; + case SingleMax: + P = Cand.RPDelta.CurrentMax; + break; + case ResourceReduce: + ResIdx = Cand.Policy.ReduceResIdx; + break; + case ResourceDemand: + ResIdx = Cand.Policy.DemandResIdx; + break; + case TopDepthReduce: + Latency = Cand.SU->getDepth(); + break; + case TopPathReduce: + Latency = Cand.SU->getHeight(); + break; + case BotHeightReduce: + Latency = Cand.SU->getHeight(); + break; + case BotPathReduce: + Latency = Cand.SU->getDepth(); + break; + } + dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + if (P.isValid()) + dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) + << ":" << P.UnitIncrease << " "; + else + dbgs() << " "; + if (ResIdx) + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + if (Latency) + dbgs() << " " << Latency << " cycles "; + else + dbgs() << " "; + dbgs() << '\n'; +} +#endif + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { + ReadyQueue &Q = Zone.Available; + + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(DAG, SchedModel); + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand)); + } + } +} + +static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, + bool IsTop) { + DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot") + << " SU(" << Cand.SU->NodeNum << ") " + << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + SchedCandidate TopCand(NoPolicy); + checkResourceLimits(TopCand, BotCand); + + // Prefer bottom scheduling when heuristics are silent. + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { + IsTopNode = false; + tracePick(BotCand, IsTopNode); + return BotCand.SU; + } + // Check if the top Q has a better candidate. + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { + if (TopCand.Reason < BotCand.Reason) { + IsTopNode = true; + tracePick(TopCand, IsTopNode); + return TopCand.SU; + } + IsTopNode = false; + tracePick(BotCand, IsTopNode); + return BotCand.SU; + } + // Check for a salient pressure difference and pick the best from either side. + if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { + IsTopNode = true; + tracePick(TopCand, IsTopNode); + return TopCand.SU; + } + // Otherwise prefer the bottom candidate, in node order if all else failed. + if (TopCand.Reason < BotCand.Reason) { + IsTopNode = true; + tracePick(TopCand, IsTopNode); + return TopCand.SU; + } + IsTopNode = false; + tracePick(BotCand, IsTopNode); + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return NULL; + } + SUnit *SU; + do { + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + SU = TopCand.SU; + } + IsTopNode = true; + } + else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + SU = BotCand.SU; + } + IsTopNode = false; + } + else { + SU = pickNodeBidirectional(IsTopNode); + } + } while (SU->isScheduled); + + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "Scheduling " << *SU->getInstr()); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update +/// it's state based on the current cycle before MachineSchedStrategy does. +void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } + else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} + +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); + // Register DAG post-processors. + if (EnableLoadCluster) + DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); + if (EnableMacroFusion) + DAG->addMutation(new MacroFusion(DAG->TII)); + return DAG; +} +static MachineSchedRegistry +ConvergingSchedRegistry("converge", "Standard converging scheduler.", + createConvergingSched); + +//===----------------------------------------------------------------------===// +// ILP Scheduler. Currently for experimental analysis of heuristics. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Order nodes by the ILP metric. +struct ILPOrder { + const SchedDFSResult *DFSResult; + const BitVector *ScheduledTrees; + bool MaximizeILP; + + ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} + + /// \brief Apply a less-than relation on node priority. + /// + /// (Return true if A comes after B in the Q.) + bool operator()(const SUnit *A, const SUnit *B) const { + unsigned SchedTreeA = DFSResult->getSubtreeID(A); + unsigned SchedTreeB = DFSResult->getSubtreeID(B); + if (SchedTreeA != SchedTreeB) { + // Unscheduled trees have lower priority. + if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) + return ScheduledTrees->test(SchedTreeB); + + // Trees with shallower connections have have lower priority. + if (DFSResult->getSubtreeLevel(SchedTreeA) + != DFSResult->getSubtreeLevel(SchedTreeB)) { + return DFSResult->getSubtreeLevel(SchedTreeA) + < DFSResult->getSubtreeLevel(SchedTreeB); + } + } + if (MaximizeILP) + return DFSResult->getILP(A) < DFSResult->getILP(B); + else + return DFSResult->getILP(A) > DFSResult->getILP(B); + } +}; + +/// \brief Schedule based on the ILP metric. +class ILPScheduler : public MachineSchedStrategy { + /// In case all subtrees are eventually connected to a common root through + /// data dependence (e.g. reduction), place an upper limit on their size. + /// + /// FIXME: A subtree limit is generally good, but in the situation commented + /// above, where multiple similar subtrees feed a common root, we should + /// only split at a point where the resulting subtrees will be balanced. + /// (a motivating test case must be found). + static const unsigned SubtreeLimit = 16; + + ScheduleDAGMI *DAG; + ILPOrder Cmp; + + std::vector<SUnit*> ReadyQ; +public: + ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} + + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; + DAG->computeDFSResult(); + Cmp.DFSResult = DAG->getDFSResult(); + Cmp.ScheduledTrees = &DAG->getScheduledTrees(); + ReadyQ.clear(); + } + + virtual void registerRoots() { + // Restore the heap in ReadyQ with the updated DFS results. + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + /// Callback to select the highest priority node from the ready Q. + virtual SUnit *pickNode(bool &IsTopNode) { + if (ReadyQ.empty()) return NULL; + std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + SUnit *SU = ReadyQ.back(); + ReadyQ.pop_back(); + IsTopNode = false; + DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " + << *SU->getInstr() + << " ILP: " << DAG->getDFSResult()->getILP(SU) + << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" + << DAG->getDFSResult()->getSubtreeLevel( + DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); + return SU; + } + + /// \brief Scheduler callback to notify that a new subtree is scheduled. + virtual void scheduleTree(unsigned SubtreeID) { + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } + + /// Callback after a node is scheduled. Mark a newly scheduled tree, notify + /// DFSResults, and resort the priority Q. + virtual void schedNode(SUnit *SU, bool IsTopNode) { + assert(!IsTopNode && "SchedDFSResult needs bottom-up"); + } + + virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + + virtual void releaseBottomNode(SUnit *SU) { + ReadyQ.push_back(SU); + std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } +}; +} // namespace + +static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(true)); +} +static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(false)); +} +static MachineSchedRegistry ILPMaxRegistry( + "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); +static MachineSchedRegistry ILPMinRegistry( + "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); + +//===----------------------------------------------------------------------===// +// Machine Instruction Shuffler for Correctness Testing +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace { +/// Apply a less-than relation on the node order, which corresponds to the +/// instruction order prior to scheduling. IsReverse implements greater-than. +template<bool IsReverse> +struct SUnitOrder { + bool operator()(SUnit *A, SUnit *B) const { + if (IsReverse) + return A->NodeNum > B->NodeNum; + else + return A->NodeNum < B->NodeNum; + } +}; + +/// Reorder instructions as much as possible. +class InstructionShuffler : public MachineSchedStrategy { + bool IsAlternating; + bool IsTopDown; + + // Using a less-than relation (SUnitOrder<false>) for the TopQ priority + // gives nodes with a higher number higher priority causing the latest + // instructions to be scheduled first. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > + TopQ; + // When scheduling bottom-up, use greater-than as the queue priority. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > + BottomQ; +public: + InstructionShuffler(bool alternate, bool topdown) + : IsAlternating(alternate), IsTopDown(topdown) {} + + virtual void initialize(ScheduleDAGMI *) { + TopQ.clear(); + BottomQ.clear(); + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *SU; + if (IsTopDown) { + do { + if (TopQ.empty()) return NULL; + SU = TopQ.top(); + TopQ.pop(); + } while (SU->isScheduled); + IsTopNode = true; + } + else { + do { + if (BottomQ.empty()) return NULL; + SU = BottomQ.top(); + BottomQ.pop(); + } while (SU->isScheduled); + IsTopNode = false; + } + if (IsAlternating) + IsTopDown = !IsTopDown; + return SU; + } + + virtual void schedNode(SUnit *SU, bool IsTopNode) {} + + virtual void releaseTopNode(SUnit *SU) { + TopQ.push(SU); + } + virtual void releaseBottomNode(SUnit *SU) { + BottomQ.push(SU); + } +}; +} // namespace + +static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { + bool Alternate = !ForceTopDown && !ForceBottomUp; + bool TopDown = !ForceBottomUp; + assert((TopDown || !ForceTopDown) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); +} +static MachineSchedRegistry ShufflerRegistry( + "shuffle", "Shuffle machine instructions alternating directions", + createInstructionShuffler); +#endif // !NDEBUG + +//===----------------------------------------------------------------------===// +// GraphWriter support for ScheduleDAGMI. +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace llvm { + +template<> struct GraphTraits< + ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; + +template<> +struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return false; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { + std::string Str; + raw_string_ostream SS(Str); + SS << "SU(" << SU->NodeNum << ')'; + return SS.str(); + } + static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); + } + + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + std::string Str("shape=Mrecord"); + const SchedDFSResult *DFS = + static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult(); + if (DFS) { + Str += ",style=filled,fillcolor=\"#"; + Str += DOT::getColorString(DFS->getSubtreeID(N)); + Str += '"'; + } + return Str; + } +}; +} // namespace llvm +#endif // NDEBUG + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAGMI::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp new file mode 100644 index 0000000..4dafbe5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -0,0 +1,712 @@ +//===-- MachineSink.cpp - Sinking for machine instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into successor blocks when possible, so that +// they aren't executed on paths where their results aren't needed. +// +// This pass is not intended to be a replacement or a complete alternative +// for an LLVM-IR-level sinking pass. It is only designed to sink simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-sink" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +static cl::opt<bool> +SplitEdges("machine-sink-split", + cl::desc("Split critical edges during machine sinking"), + cl::init(true), cl::Hidden); + +STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumSplit, "Number of critical edges split"); +STATISTIC(NumCoalesces, "Number of copies coalesced"); + +namespace { + class MachineSinking : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; // Machine register information + MachineDominatorTree *DT; // Machine dominator tree + MachineLoopInfo *LI; + AliasAnalysis *AA; + + // Remember which edges have been considered for breaking. + SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8> + CEBCandidates; + + public: + static char ID; // Pass identification + MachineSinking() : MachineFunctionPass(ID) { + initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + } + + virtual void releaseMemory() { + CEBCandidates.clear(); + } + + private: + bool ProcessBlock(MachineBasicBlock &MBB); + bool isWorthBreakingCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To); + MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To, + bool BreakPHIEdge); + bool SinkInstruction(MachineInstr *MI, bool &SawStore); + bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &BreakPHIEdge, bool &LocalUse) const; + MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB, + bool &BreakPHIEdge); + bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo); + + bool PerformTrivialForwardCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB); + }; + + // SuccessorSorter - Sort Successors according to their loop depth. + struct SuccessorSorter { + SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} + bool operator()(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) const { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + } + MachineLoopInfo *LI; + }; +} // end anonymous namespace + +char MachineSinking::ID = 0; +char &llvm::MachineSinkingID = MachineSinking::ID; +INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", + "Machine code sinking", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MachineSinking, "machine-sink", + "Machine code sinking", false, false) + +bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB) { + if (!MI->isCopy()) + return false; + + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + !TargetRegisterInfo::isVirtualRegister(DstReg) || + !MRI->hasOneNonDBGUse(SrcReg)) + return false; + + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *DRC = MRI->getRegClass(DstReg); + if (SRC != DRC) + return false; + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isCopyLike()) + return false; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MRI->replaceRegWith(DstReg, SrcReg); + MI->eraseFromParent(); + ++NumCoalesces; + return true; +} + +/// AllUsesDominatedByBlock - Return true if all uses of the specified register +/// occur in blocks dominated by the specified block. If any use is in the +/// definition block, then return false since it is never legal to move def +/// after uses. +bool +MachineSinking::AllUsesDominatedByBlock(unsigned Reg, + MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &BreakPHIEdge, + bool &LocalUse) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Only makes sense for vregs"); + + // Ignore debug uses because debug info doesn't affect the code. + if (MRI->use_nodbg_empty(Reg)) + return true; + + // BreakPHIEdge is true if all the uses are in the successor MBB being sunken + // into and they are all PHI nodes. In this case, machine-sink must break + // the critical edge first. e.g. + // + // BB#1: derived from LLVM BB %bb4.preheader + // Predecessors according to CFG: BB#0 + // ... + // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead> + // ... + // JE_4 <BB#37>, %EFLAGS<imp-use> + // Successors according to CFG: BB#37 BB#2 + // + // BB#2: derived from LLVM BB %bb.nph + // Predecessors according to CFG: BB#0 BB#1 + // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1> + BreakPHIEdge = true; + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (!(UseBlock == MBB && UseInst->isPHI() && + UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) { + BreakPHIEdge = false; + break; + } + } + if (BreakPHIEdge) + return true; + + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + // Determine the block of the use. + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseInst->isPHI()) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + } else if (UseBlock == DefMBB) { + LocalUse = true; + return false; + } + + // Check that it dominates. + if (!DT->dominates(MBB, UseBlock)) + return false; + } + + return true; +} + +bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "******** Machine Sinking ********\n"); + + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + MRI = &MF.getRegInfo(); + DT = &getAnalysis<MachineDominatorTree>(); + LI = &getAnalysis<MachineLoopInfo>(); + AA = &getAnalysis<AliasAnalysis>(); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + CEBCandidates.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) + MadeChange |= ProcessBlock(*I); + + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + return EverMadeChange; +} + +bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { + // Can't sink anything out of a block that has less than two successors. + if (MBB.succ_size() <= 1 || MBB.empty()) return false; + + // Don't bother sinking code out of unreachable blocks. In addition to being + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. + if (!DT->isReachableFromEntry(&MBB)) return false; + + bool MadeChange = false; + + // Walk the basic block bottom-up. Remember if we saw a store. + MachineBasicBlock::iterator I = MBB.end(); + --I; + bool ProcessedBegin, SawStore = false; + do { + MachineInstr *MI = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == MBB.begin(); + if (!ProcessedBegin) + --I; + + if (MI->isDebugValue()) + continue; + + bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); + if (Joined) { + MadeChange = true; + continue; + } + + if (SinkInstruction(MI, SawStore)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To) { + // FIXME: Need much better heuristics. + + // If the pass has already considered breaking this edge (during this pass + // through the function), then let's go ahead and break it. This means + // sinking multiple "cheap" instructions into the same block. + if (!CEBCandidates.insert(std::make_pair(From, To))) + return true; + + if (!MI->isCopy() && !MI->isAsCheapAsAMove()) + return true; + + // MI is cheap, we probably don't want to break the critical edge for it. + // However, if this would allow some definitions of its source operands + // to be sunk then it's probably worth it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MRI->hasOneNonDBGUse(Reg)) + return true; + } + + return false; +} + +MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { + if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) + return 0; + + // Avoid breaking back edge. From == To means backedge for single BB loop. + if (!SplitEdges || FromBB == ToBB) + return 0; + + // Check for backedges of more "complex" loops. + if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && + LI->isLoopHeader(ToBB)) + return 0; + + // It's not always legal to break critical edges and sink the computation + // to the edge. + // + // BB#1: + // v1024 + // Beq BB#3 + // <fallthrough> + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // + // BB#1: + // ... + // Bne BB#2 + // BB#4: + // v1024 = + // B BB#3 + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // flow. We need to ensure the new basic block where the computation is + // sunk to dominates all the uses. + // It's only legal to break critical edge and sink the computation to the + // new block if all the predecessors of "To", except for "From", are + // not dominated by "From". Given SSA property, this means these + // predecessors are dominated by "To". + // + // There is no need to do this check if all the uses are PHI nodes. PHI + // sources are only defined on the specific predecessor edges. + if (!BreakPHIEdge) { + for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), + E = ToBB->pred_end(); PI != E; ++PI) { + if (*PI == FromBB) + continue; + if (!DT->dominates(ToBB, *PI)) + return 0; + } + } + + return FromBB->SplitCriticalEdge(ToBB, this); +} + +static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { + return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence(); +} + +/// collectDebgValues - Scan instructions following MI and collect any +/// matching DBG_VALUEs. +static void collectDebugValues(MachineInstr *MI, + SmallVector<MachineInstr *, 2> & DbgValues) { + DbgValues.clear(); + if (!MI->getOperand(0).isReg()) + return; + + MachineBasicBlock::iterator DI = MI; ++DI; + for (MachineBasicBlock::iterator DE = MI->getParent()->end(); + DI != DE; ++DI) { + if (!DI->isDebugValue()) + return; + if (DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() == MI->getOperand(0).getReg()) + DbgValues.push_back(DI); + } +} + +/// isPostDominatedBy - Return true if A is post dominated by B. +static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) { + + // FIXME - Use real post dominator. + if (A->succ_size() != 2) + return false; + MachineBasicBlock::succ_iterator I = A->succ_begin(); + if (B == *I) + ++I; + MachineBasicBlock *OtherSuccBlock = *I; + if (OtherSuccBlock->succ_size() != 1 || + *(OtherSuccBlock->succ_begin()) != B) + return false; + + return true; +} + +/// isProfitableToSinkTo - Return true if it is profitable to sink MI. +bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo) { + assert (MI && "Invalid MachineInstr!"); + assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); + + if (MBB == SuccToSinkTo) + return false; + + // It is profitable if SuccToSinkTo does not post dominate current block. + if (!isPostDominatedBy(MBB, SuccToSinkTo)) + return true; + + // Check if only use in post dominated block is PHI instruction. + bool NonPHIUse = false; + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseBlock == SuccToSinkTo && !UseInst->isPHI()) + NonPHIUse = true; + } + if (!NonPHIUse) + return true; + + // If SuccToSinkTo post dominates then also it may be profitable if MI + // can further profitably sinked into another block in next round. + bool BreakPHIEdge = false; + // FIXME - If finding successor is compile time expensive then catch results. + if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) + return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); + + // If SuccToSinkTo is final destination and it is a post dominator of current + // block then it is not profitable to sink MI into SuccToSinkTo block. + return false; +} + +/// FindSuccToSinkTo - Find a successor to sink this instruction to. +MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, + MachineBasicBlock *MBB, + bool &BreakPHIEdge) { + + assert (MI && "Invalid MachineInstr!"); + assert (MBB && "Invalid MachineBasicBlock!"); + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + MachineBasicBlock *SuccToSinkTo = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; // Ignore non-register operands. + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + return NULL; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return NULL; + } + } else { + // Virtual register uses are always safe to sink. + if (MO.isUse()) continue; + + // If it's not safe to move defs of the register class, then abort. + if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) + return NULL; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // + // x = computation + // if () {} else {} + // use x + // + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Virtual register defs can only be sunk if all their uses are in blocks + // dominated by one of the successors. + if (SuccToSinkTo) { + // If a previous operand picked a block to sink to, then this operand + // must be sinkable to the same block. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, + BreakPHIEdge, LocalUse)) + return NULL; + + continue; + } + + // Otherwise, we should look at all the successors and decide which one + // we should sink to. + // We give successors with smaller loop depth higher priority. + SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); + std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { + MachineBasicBlock *SuccBlock = *SI; + bool LocalUse = false; + if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, + BreakPHIEdge, LocalUse)) { + SuccToSinkTo = SuccBlock; + break; + } + if (LocalUse) + // Def is used locally, it's never safe to move this def. + return NULL; + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return NULL; + else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) + return NULL; + } + } + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MBB == SuccToSinkTo) + return NULL; + + // It's not safe to sink instructions to EH landing pad. Control flow into + // landing pad is implicitly defined. + if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) + return NULL; + + return SuccToSinkTo; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { + // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to + // be close to the source to make it easier to coalesce. + if (AvoidsSinking(MI, MRI)) + return false; + + // Check if it's safe to move the instruction. + if (!MI->isSafeToMove(TII, AA, SawStore)) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + bool BreakPHIEdge = false; + MachineBasicBlock *ParentBlock = MI->getParent(); + MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); + + // If there are no outputs, it must have side-effects. + if (SuccToSinkTo == 0) + return false; + + + // If the instruction to move defines a dead physical register which is live + // when leaving the basic block, don't move it because it could turn into a + // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (SuccToSinkTo->isLiveIn(Reg)) + return false; + } + + DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); + + // If the block has multiple predecessors, this would introduce computation on + // a path that it doesn't already exist. We could split the critical edge, + // but for now we just punt. + if (SuccToSinkTo->pred_size() > 1) { + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + bool TryBreak = false; + bool store = true; + if (!MI->isSafeToMove(TII, AA, store)) { + DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); + TryBreak = true; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); + TryBreak = true; + } + + // Don't sink instructions into a loop. + if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + TryBreak = true; + } + + // Otherwise we are OK with sinking along a critical edge. + if (!TryBreak) + DEBUG(dbgs() << "Sinking along critical edge.\n"); + else { + MachineBasicBlock *NewSucc = + SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); + if (!NewSucc) { + DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); + return false; + } else { + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << ParentBlock->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); + SuccToSinkTo = NewSucc; + ++NumSplit; + BreakPHIEdge = false; + } + } + } + + if (BreakPHIEdge) { + // BreakPHIEdge is true if all the uses are in the successor MBB being + // sunken into and they are all PHI nodes. In this case, machine-sink must + // break the critical edge first. + MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock, + SuccToSinkTo, BreakPHIEdge); + if (!NewSucc) { + DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); + return false; + } + + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << ParentBlock->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); + SuccToSinkTo = NewSucc; + ++NumSplit; + } + + // Determine where to insert into. Skip phi nodes. + MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) + ++InsertPos; + + // collect matching debug values. + SmallVector<MachineInstr *, 2> DbgValuesToSink; + collectDebugValues(MI, DbgValuesToSink); + + // Move the instruction. + SuccToSinkTo->splice(InsertPos, ParentBlock, MI, + ++MachineBasicBlock::iterator(MI)); + + // Move debug values. + for (SmallVector<MachineInstr *, 2>::iterator DBI = DbgValuesToSink.begin(), + DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { + MachineInstr *DbgMI = *DBI; + SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, + ++MachineBasicBlock::iterator(DbgMI)); + } + + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. + MI->clearKillInfo(); + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 0000000..49d8c4e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,1290 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-trace-metrics" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { + std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = MF->getTarget().getInstrInfo(); + TRI = MF->getTarget().getRegisterInfo(); + MRI = &MF->getRegInfo(); + Loops = &getAnalysis<MachineLoopInfo>(); + const TargetSubtargetInfo &ST = + MF->getTarget().getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); + BlockInfo.resize(MF->getNumBlockIDs()); + ProcResourceCycles.resize(MF->getNumBlockIDs() * + SchedModel.getNumProcResourceKinds()); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + MF = 0; + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = 0; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + FBI->HasCalls = false; + unsigned InstrCount = 0; + + // Add up per-processor resource cycles as well. + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + SmallVector<unsigned, 32> PRCycles(PRKinds); + + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isTransient()) + continue; + ++InstrCount; + if (MI->isCall()) + FBI->HasCalls = true; + + // Count processor resources used. + if (!SchedModel.hasInstrSchedModel()) + continue; + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI); + if (!SC->isValid()) + continue; + + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); + PRCycles[PI->ProcResourceIdx] += PI->Cycles; + } + } + FBI->InstrCount = InstrCount; + + // Scale the resource cycles so they are comparable. + unsigned PROffset = MBB->getNumber() * PRKinds; + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceCycles[PROffset + K] = + PRCycles[K] * SchedModel.getResourceFactor(K); + + return FBI; +} + +ArrayRef<unsigned> +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { + assert(BlockInfo[MBBNum].hasResources() && + "getResources() must be called before getProcResourceCycles()"); + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); + return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds, + PRKinds); +} + + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { + BlockInfo.resize(MTM.BlockInfo.size()); + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); + ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +const MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { + return MTM.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + TBI->Head = MBB->getNumber(); + std::fill(ProcResourceDepths.begin() + PROffset, + ProcResourceDepths.begin() + PROffset + PRKinds, 0); + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + unsigned PredNum = TBI->Pred->getNumber(); + TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; + TBI->Head = PredTBI->Head; + + // Compute per-resource depths. + ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum); + ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; + + // Compute resources for the current block. + TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber()); + + // The trace tail is done. + if (!TBI->Succ) { + TBI->Tail = MBB->getNumber(); + std::copy(PRCycles.begin(), PRCycles.end(), + ProcResourceHeights.begin() + PROffset); + return; + } + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + unsigned SuccNum = TBI->Succ->getNumber(); + TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; + TBI->Tail = SuccTBI->Tail; + + // Compute per-resource heights. + ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : 0; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : 0; +} + +/// Get an array of processor resource depths for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by all blocks preceding MBB in its trace. It does not include instructions +/// in MBB. +/// +/// Compare TraceBlockInfo::InstrDepth. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceDepths(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); + return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds, + PRKinds); +} + +/// Get an array of processor resource heights for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by this block and all blocks following it in its trace. +/// +/// Compare TraceBlockInfo::InstrHeight. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceHeights(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); + return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds, + PRKinds); +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// We never allow traces that leave loops, but we do allow traces to enter +// nested loops. We also never allow traces to contain back-edges. +// +// This means that a loop header can never appear above the center block of a +// trace, except as the trace head. Below the center block, loop exiting edges +// are banned. +// +// Return true if an edge from the From loop to the To loop is leaving a loop. +// Either of To and From can be null. +static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { + return From && !From->contains(To); +} + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() const { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + +public: + MinInstrCountEnsemble(MachineTraceMetrics *mtm) + : MachineTraceMetrics::Ensemble(mtm) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return 0; + unsigned CurCount = MTM.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = 0; + unsigned BestDepth = 0; + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + const MachineBasicBlock *Pred = *I; + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = 0; + unsigned BestHeight = 0; + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors exiting CurLoop. + if (isExitingLoop(CurLoop, getLoopFor(Succ))) + continue; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +void MachineTraceMetrics::verifyAnalysis() const { + if (!MF) + return; +#ifndef NDEBUG + assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->verify(); +#endif +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; + SmallPtrSet<const MachineBasicBlock*, 8> Visited; + const MachineLoopInfo *Loops; + bool Downward; + LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, + const MachineLoopInfo *loops) + : Blocks(blocks), Loops(loops), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage<LoopBounds, true> { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // From is null once when To is the trace center block. + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To); + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, MTM.Loops); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + Bounds.Visited.clear(); + typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; + for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(*I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + Bounds.Visited.clear(); + typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; + for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + TBI.Succ = pickTraceSucc(*I); + DEBUG({ + if (TBI.Succ) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(*I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector<const MachineBasicBlock*, 16> WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidHeight()) + continue; + if (TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Succ is actually a *I successor. + assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidDepth()) + continue; + if (TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Pred is actually a *I predecessor. + assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Clear any per-instruction data. We only have to do this for BadMBB itself + // because the instructions in that block may change. Other blocks may be + // invalidated, but their instructions will stay the same, so there is no + // need to erase the Cycle entries. They will be overwritten when we + // recompute. + for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); + I != E; ++I) + Cycles.erase(I); +} + +void MachineTraceMetrics::Ensemble::verify() const { +#ifndef NDEBUG + assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && + "Outdated BlockInfo size"); + for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { + const TraceBlockInfo &TBI = BlockInfo[Num]; + if (TBI.hasValidDepth() && TBI.Pred) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && + "Trace is broken, depth should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); + } + if (TBI.hasValidHeight() && TBI.Succ) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && + "Trace is broken, height should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); + assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && + "Trace contains backedge"); + } + } +#endif +} + +//===----------------------------------------------------------------------===// +// Data Dependencies +//===----------------------------------------------------------------------===// +// +// Compute the depth and height of each instruction based on data dependencies +// and instruction latencies. These cycle numbers assume that the CPU can issue +// an infinite number of instructions per cycle as long as their dependencies +// are ready. + +// A data dependency is represented as a defining MI and operand numbers on the +// defining and using MI. +namespace { +struct DataDep { + const MachineInstr *DefMI; + unsigned DefOp; + unsigned UseOp; + + DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) + : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} + + /// Create a DataDep from an SSA form virtual register. + DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) + : UseOp(UseOp) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); + assert(!DefI.atEnd() && "Register has no defs"); + DefMI = &*DefI; + DefOp = DefI.getOperandNo(); + assert((++DefI).atEnd() && "Register has multiple defs"); + } +}; +} + +// Get the input data dependencies that must be ready before UseMI can issue. +// Return true if UseMI has any physreg operands. +static bool getDataDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineRegisterInfo *MRI) { + bool HasPhysRegs = false; + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + HasPhysRegs = true; + continue; + } + // Collect virtual register reads. + if (MO->readsReg()) + Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); + } + return HasPhysRegs; +} + +// Get the input data dependencies of a PHI instruction, using Pred as the +// preferred predecessor. +// This will add at most one dependency to Deps. +static void getPHIDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineBasicBlock *Pred, + const MachineRegisterInfo *MRI) { + // No predecessor at the beginning of a trace. Ignore dependencies. + if (!Pred) + return; + assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { + if (UseMI->getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI->getOperand(i).getReg(); + Deps.push_back(DataDep(MRI, Reg, i)); + return; + } + } +} + +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +namespace { +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle; + const MachineInstr *MI; + unsigned Op; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} +}; +} + +// Identify physreg dependencies for UseMI, and update the live regunit +// tracking set when scanning instructions downwards. +static void updatePhysDepsDownwards(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + SparseSet<LiveRegUnit> &RegUnits, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> Kills; + SmallVector<unsigned, 8> LiveDefOps; + + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // Track live defs and kills for updating RegUnits. + if (MO->isDef()) { + if (MO->isDead()) + Kills.push_back(Reg); + else + LiveDefOps.push_back(MO.getOperandNo()); + } else if (MO->isKill()) + Kills.push_back(Reg); + // Identify dependencies. + if (!MO->readsReg()) + continue; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); + break; + } + } + + // Update RegUnits to reflect live registers after UseMI. + // First kills. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + RegUnits.erase(*Units); + + // Second, live defs. + for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { + unsigned DefOp = LiveDefOps[i]; + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + LRU.MI = UseMI; + LRU.Op = DefOp; + } + } +} + +/// The length of the critical path through a trace is the maximum of two path +/// lengths: +/// +/// 1. The maximum height+depth over all instructions in the trace center block. +/// +/// 2. The longest cross-block dependency chain. For small blocks, it is +/// possible that the critical path through the trace doesn't include any +/// instructions in the block. +/// +/// This function computes the second number from the live-in list of the +/// center block. +unsigned MachineTraceMetrics::Ensemble:: +computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { + assert(TBI.HasValidInstrDepths && "Missing depth info"); + assert(TBI.HasValidInstrHeights && "Missing height info"); + unsigned MaxLen = 0; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + const LiveInReg &LIR = TBI.LiveIns[i]; + if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + continue; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + // Ignore dependencies outside the current trace. + const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; + if (!DefTBI.isUsefulDominator(TBI)) + continue; + unsigned Len = LIR.Height + Cycles[DefMI].Depth; + MaxLen = std::max(MaxLen, Len); + } + return MaxLen; +} + +/// Compute instruction depths for all instructions above or in MBB in its +/// trace. This assumes that the trace through MBB has already been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrDepths(const MachineBasicBlock *MBB) { + // The top of the trace may already be computed, and HasValidInstrDepths + // implies Head->HasValidInstrDepths, so we only need to start from the first + // block in the trace that needs to be recomputed. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidDepth() && "Incomplete trace"); + if (TBI.HasValidInstrDepths) + break; + Stack.push_back(MBB); + MBB = TBI.Pred; + } while (MBB); + + // FIXME: If MBB is non-null at this point, it is the last pre-computed block + // in the trace. We should track any live-out physregs that were defined in + // the trace. This is quite rare in SSA form, typically created by CSE + // hoisting a compare. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // Go through trace blocks in top-down order, stopping after the center block. + SmallVector<DataDep, 8> Deps; + while (!Stack.empty()) { + MBB = Stack.pop_back_val(); + DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrDepths = true; + TBI.CriticalPath = 0; + + // Print out resource depths here as well. + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrDepth); + ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + if (PRDepths[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + + // Also compute the critical path length through MBB when possible. + if (TBI.HasValidInstrHeights) + TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); + + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *UseMI = I; + + // Collect all data dependencies. + Deps.clear(); + if (UseMI->isPHI()) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + const DataDep &Dep = Deps[i]; + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.isUsefulDominator(TBI)) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, + /* FindMin = */ false); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[UseMI]; + MICycles.Depth = Cycle; + + if (!TBI.HasValidInstrHeights) { + DEBUG(dbgs() << Cycle << '\t' << *UseMI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + } + } +} + +// Identify physreg dependencies for MI when scanning instructions upwards. +// Return the issue height of MI after considering any live regunits. +// Height is the issue height computed from virtual register dependencies alone. +static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, + SparseSet<LiveRegUnit> &RegUnits, + const TargetSchedModel &SchedModel, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> ReadOps; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO->readsReg()) + ReadOps.push_back(MO.getOperandNo()); + if (!MO->isDef()) + continue; + // This is a def of Reg. Remove corresponding entries from RegUnits, and + // update MI Height to consider the physreg dependencies. + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + unsigned DepHeight = I->Cycle; + if (!MI->isTransient()) { + // We may not know the UseMI of this dependency, if it came from the + // live-in list. SchedModel can handle a NULL UseMI. + DepHeight += SchedModel + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, + /* FindMin = */ false); + } + Height = std::max(Height, DepHeight); + // This regunit is dead above MI. + RegUnits.erase(I); + } + } + + // Now we know the height of MI. Update any regunits read. + for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { + unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + // Set the height to the highest reader of the unit. + if (LRU.Cycle <= Height && LRU.MI != MI) { + LRU.Cycle = Height; + LRU.MI = MI; + LRU.Op = ReadOps[i]; + } + } + } + + return Height; +} + + +typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; + +// Push the height of DefMI upwards if required to match UseMI. +// Return true if this is the first time DefMI was seen. +static bool pushDepHeight(const DataDep &Dep, + const MachineInstr *UseMI, unsigned UseHeight, + MIHeightMap &Heights, + const TargetSchedModel &SchedModel, + const TargetInstrInfo *TII) { + // Adjust height by Dep.DefMI latency. + if (!Dep.DefMI->isTransient()) + UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, false); + + // Update Heights[DefMI] to be the maximum height seen. + MIHeightMap::iterator I; + bool New; + tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + if (New) + return true; + + // DefMI has been pushed before. Give it the max height. + if (I->second < UseHeight) + I->second = UseHeight; + return false; +} + +/// Assuming that the virtual register defined by DefMI:DefOp was used by +/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop +/// when reaching the block that contains DefMI. +void MachineTraceMetrics::Ensemble:: +addLiveIns(const MachineInstr *DefMI, unsigned DefOp, + ArrayRef<const MachineBasicBlock*> Trace) { + assert(!Trace.empty() && "Trace should contain at least one block"); + unsigned Reg = DefMI->getOperand(DefOp).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const MachineBasicBlock *DefMBB = DefMI->getParent(); + + // Reg is live-in to all blocks in Trace that follow DefMBB. + for (unsigned i = Trace.size(); i; --i) { + const MachineBasicBlock *MBB = Trace[i-1]; + if (MBB == DefMBB) + return; + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + // Just add the register. The height will be updated later. + TBI.LiveIns.push_back(Reg); + } +} + +/// Compute instruction heights in the trace through MBB. This updates MBB and +/// the blocks below it in the trace. It is assumed that the trace has already +/// been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrHeights(const MachineBasicBlock *MBB) { + // The bottom of the trace may already be computed. + // Find the blocks that need updating. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidHeight() && "Incomplete trace"); + if (TBI.HasValidInstrHeights) + break; + Stack.push_back(MBB); + TBI.LiveIns.clear(); + MBB = TBI.Succ; + } while (MBB); + + // As we move upwards in the trace, keep track of instructions that are + // required by deeper trace instructions. Map MI -> height required so far. + MIHeightMap Heights; + + // For physregs, the def isn't known when we see the use. + // Instead, keep track of the highest use of each regunit. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // If the bottom of the trace was already precomputed, initialize heights + // from its live-in list. + // MBB is the highest precomputed block in the trace. + if (MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg LI = TBI.LiveIns[i]; + if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + // For virtual registers, the def latency is included. + unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; + if (Height < LI.Height) + Height = LI.Height; + } else { + // For register units, the def latency is not included because we don't + // know the def yet. + RegUnits[LI.Reg].Cycle = LI.Height; + } + } + } + + // Go through the trace blocks in bottom-up order. + SmallVector<DataDep, 8> Deps; + for (;!Stack.empty(); Stack.pop_back()) { + MBB = Stack.back(); + DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrHeights = true; + TBI.CriticalPath = 0; + + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrHeight); + ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber()); + for (unsigned K = 0; K != PRHeights.size(); ++K) + if (PRHeights[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + + // Get dependencies from PHIs in the trace successor. + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *PHI = I; + Deps.clear(); + getPHIDeps(PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); + if (pushDepHeight(Deps.front(), PHI, Height, + Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); + } + } + } + + // Go through the block backwards. + for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); + BI != BB;) { + const MachineInstr *MI = --BI; + + // Find the MI height as determined by virtual register uses in the + // trace below. + unsigned Cycle = 0; + MIHeightMap::iterator HeightI = Heights.find(MI); + if (HeightI != Heights.end()) { + Cycle = HeightI->second; + // We won't be seeing any more MI uses. + Heights.erase(HeightI); + } + + // Don't process PHI deps. They depend on the specific predecessor, and + // we'll get them when visiting the predecessor. + Deps.clear(); + bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + + // There may also be regunit dependencies to include in the height. + if (HasPhysRegs) + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, + MTM.SchedModel, MTM.TII, MTM.TRI); + + // Update the required height of any virtual registers read by MI. + for (unsigned i = 0, e = Deps.size(); i != e; ++i) + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack); + + InstrCycles &MICycles = Cycles[MI]; + MICycles.Height = Cycle; + if (!TBI.HasValidInstrDepths) { + DEBUG(dbgs() << Cycle << '\t' << *MI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + } + + // Update virtual live-in heights. They were added by addLiveIns() with a 0 + // height because the final height isn't known until now. + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg &LIR = TBI.LiveIns[i]; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + LIR.Height = Heights.lookup(DefMI); + DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + } + + // Transfer the live regunits to the live-in list. + for (SparseSet<LiveRegUnit>::const_iterator + RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { + TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); + DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + << '@' << RI->Cycle); + } + DEBUG(dbgs() << '\n'); + + if (!TBI.HasValidInstrDepths) + continue; + // Add live-ins to the critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, + computeCrossBlockCriticalPath(TBI)); + DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + } +} + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + // FIXME: Check cache tags, recompute as needed. + computeTrace(MBB); + computeInstrDepths(MBB); + computeInstrHeights(MBB); + return Trace(*this, BlockInfo[MBB->getNumber()]); +} + +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector<DataDep, 1> Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); + return DepCycle; +} + +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // Find the limiting processor resource. + // Numbers have been pre-scaled to be comparable. + unsigned PRMax = 0; + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + if (Bottom) { + ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); + } else { + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K]); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + + unsigned Instrs = TBI.InstrDepth; + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return std::max(Instrs, PRMax); +} + +unsigned MachineTraceMetrics::Trace:: +getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { + // Add up resources above and below the center block. + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); + unsigned PRMax = 0; + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + PRMax = std::max(PRMax, PRCycles); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return std::max(Instrs, PRMax); +} + +void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { + OS << getName() << " ensemble:\n"; + for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { + OS << " BB#" << i << '\t'; + BlockInfo[i].print(OS); + OS << '\n'; + } +} + +void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { + if (hasValidDepth()) { + OS << "depth=" << InstrDepth; + if (Pred) + OS << " pred=BB#" << Pred->getNumber(); + else + OS << " pred=null"; + OS << " head=BB#" << Head; + if (HasValidInstrDepths) + OS << " +instrs"; + } else + OS << "depth invalid"; + OS << ", "; + if (hasValidHeight()) { + OS << "height=" << InstrHeight; + if (Succ) + OS << " succ=BB#" << Succ->getNumber(); + else + OS << " succ=null"; + OS << " tail=BB#" << Tail; + if (HasValidInstrHeights) + OS << " +instrs"; + } else + OS << "height invalid"; + if (HasValidInstrDepths && HasValidInstrHeights) + OS << ", crit=" << CriticalPath; +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + unsigned MBBNum = &TBI - &TE.BlockInfo[0]; + + OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum + << " --> BB#" << TBI.Tail << ':'; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) + OS << ' ' << TBI.CriticalPath << " cycles."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\nBB#" << MBBNum; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n "; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp new file mode 100644 index 0000000..4b12300 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -0,0 +1,1605 @@ +//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass to verify generated machine code. The following is checked: +// +// Operand counts: All explicit operands must be present. +// +// Register classes: All physical and virtual register operands must be +// compatible with the register class required by the instruction descriptor. +// +// Register live intervals: Registers must be defined only once, and must be +// defined before use. +// +// The machine code verifier is enabled from LLVMTargetMachine.cpp with the +// command-line option -verify-machineinstrs, or by defining the environment +// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive +// the verifier errors. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +namespace { + struct MachineVerifier { + + MachineVerifier(Pass *pass, const char *b) : + PASS(pass), + Banner(b), + OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) + {} + + bool runOnMachineFunction(MachineFunction &MF); + + Pass *const PASS; + const char *Banner; + const char *const OutFileName; + raw_ostream *OS; + const MachineFunction *MF; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + + unsigned foundErrors; + + typedef SmallVector<unsigned, 16> RegVector; + typedef SmallVector<const uint32_t*, 4> RegMaskVector; + typedef DenseSet<unsigned> RegSet; + typedef DenseMap<unsigned, const MachineInstr*> RegMap; + typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet; + + const MachineInstr *FirstTerminator; + BlockSet FunctionBlocks; + + BitVector regsReserved; + RegSet regsLive; + RegVector regsDefined, regsDead, regsKilled; + RegMaskVector regMasks; + RegSet regsLiveInButUnused; + + SlotIndex lastIndex; + + // Add Reg and any sub-registers to RV + void addRegWithSubRegs(RegVector &RV, unsigned Reg) { + RV.push_back(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RV.push_back(*SubRegs); + } + + struct BBInfo { + // Is this MBB reachable from the MF entry point? + bool reachable; + + // Vregs that must be live in because they are used without being + // defined. Map value is the user. + RegMap vregsLiveIn; + + // Regs killed in MBB. They may be defined again, and will then be in both + // regsKilled and regsLiveOut. + RegSet regsKilled; + + // Regs defined in MBB and live out. Note that vregs passing through may + // be live out without being mentioned here. + RegSet regsLiveOut; + + // Vregs that pass through MBB untouched. This set is disjoint from + // regsKilled and regsLiveOut. + RegSet vregsPassed; + + // Vregs that must pass through MBB because they are needed by a successor + // block. This set is disjoint from regsLiveOut. + RegSet vregsRequired; + + // Set versions of block's predecessor and successor lists. + BlockSet Preds, Succs; + + BBInfo() : reachable(false) {} + + // Add register to vregsPassed if it belongs there. Return true if + // anything changed. + bool addPassed(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) + return false; + return vregsPassed.insert(Reg).second; + } + + // Same for a full set. + bool addPassed(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addPassed(*I)) + changed = true; + return changed; + } + + // Add register to vregsRequired if it belongs there. Return true if + // anything changed. + bool addRequired(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsLiveOut.count(Reg)) + return false; + return vregsRequired.insert(Reg).second; + } + + // Same for a full set. + bool addRequired(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addRequired(*I)) + changed = true; + return changed; + } + + // Same for a full map. + bool addRequired(const RegMap &RM) { + bool changed = false; + for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I) + if (addRequired(I->first)) + changed = true; + return changed; + } + + // Live-out registers are either in regsLiveOut or vregsPassed. + bool isLiveOut(unsigned Reg) const { + return regsLiveOut.count(Reg) || vregsPassed.count(Reg); + } + }; + + // Extra register info per MBB. + DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap; + + bool isReserved(unsigned Reg) { + return Reg < regsReserved.size() && regsReserved.test(Reg); + } + + bool isAllocatable(unsigned Reg) { + return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); + } + + // Analysis information if available + LiveVariables *LiveVars; + LiveIntervals *LiveInts; + LiveStacks *LiveStks; + SlotIndexes *Indexes; + + void visitMachineFunctionBefore(); + void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); + void visitMachineBundleBefore(const MachineInstr *MI); + void visitMachineInstrBefore(const MachineInstr *MI); + void visitMachineOperand(const MachineOperand *MO, unsigned MONum); + void visitMachineInstrAfter(const MachineInstr *MI); + void visitMachineBundleAfter(const MachineInstr *MI); + void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); + void visitMachineFunctionAfter(); + + void report(const char *msg, const MachineFunction *MF); + void report(const char *msg, const MachineBasicBlock *MBB); + void report(const char *msg, const MachineInstr *MI); + void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI); + + void verifyInlineAsm(const MachineInstr *MI); + + void checkLiveness(const MachineOperand *MO, unsigned MONum); + void markReachable(const MachineBasicBlock *MBB); + void calcRegsPassed(); + void checkPHIOps(const MachineBasicBlock *MBB); + + void calcRegsRequired(); + void verifyLiveVariables(); + void verifyLiveIntervals(); + void verifyLiveInterval(const LiveInterval&); + void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); + void verifyLiveIntervalSegment(const LiveInterval&, + LiveInterval::const_iterator); + }; + + struct MachineVerifierPass : public MachineFunctionPass { + static char ID; // Pass ID, replacement for typeid + const char *const Banner; + + MachineVerifierPass(const char *b = 0) + : MachineFunctionPass(ID), Banner(b) { + initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) { + MF.verify(this, Banner); + return false; + } + }; + +} + +char MachineVerifierPass::ID = 0; +INITIALIZE_PASS(MachineVerifierPass, "machineverifier", + "Verify generated machine code", false, false) + +FunctionPass *llvm::createMachineVerifierPass(const char *Banner) { + return new MachineVerifierPass(Banner); +} + +void MachineFunction::verify(Pass *p, const char *Banner) const { + MachineVerifier(p, Banner) + .runOnMachineFunction(const_cast<MachineFunction&>(*this)); +} + +bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { + raw_ostream *OutFile = 0; + if (OutFileName) { + std::string ErrorInfo; + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + raw_fd_ostream::F_Append); + if (!ErrorInfo.empty()) { + errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; + exit(1); + } + + OS = OutFile; + } else { + OS = &errs(); + } + + foundErrors = 0; + + this->MF = &MF; + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); + MRI = &MF.getRegInfo(); + + LiveVars = NULL; + LiveInts = NULL; + LiveStks = NULL; + Indexes = NULL; + if (PASS) { + LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); + // We don't want to verify LiveVariables if LiveIntervals is available. + if (!LiveInts) + LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); + LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>(); + Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>(); + } + + visitMachineFunctionBefore(); + for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); + MFI!=MFE; ++MFI) { + visitMachineBasicBlockBefore(MFI); + // Keep track of the current bundle header. + const MachineInstr *CurBundle = 0; + // Do we expect the next instruction to be part of the same bundle? + bool InBundle = false; + + for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { + if (MBBI->getParent() != MFI) { + report("Bad instruction parent pointer", MFI); + *OS << "Instruction: " << *MBBI; + continue; + } + + // Check for consistent bundle flags. + if (InBundle && !MBBI->isBundledWithPred()) + report("Missing BundledPred flag, " + "BundledSucc was set on predecessor", MBBI); + if (!InBundle && MBBI->isBundledWithPred()) + report("BundledPred flag is set, " + "but BundledSucc not set on predecessor", MBBI); + + // Is this a bundle header? + if (!MBBI->isInsideBundle()) { + if (CurBundle) + visitMachineBundleAfter(CurBundle); + CurBundle = MBBI; + visitMachineBundleBefore(CurBundle); + } else if (!CurBundle) + report("No bundle header", MBBI); + visitMachineInstrBefore(MBBI); + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) + visitMachineOperand(&MBBI->getOperand(I), I); + visitMachineInstrAfter(MBBI); + + // Was this the last bundled instruction? + InBundle = MBBI->isBundledWithSucc(); + } + if (CurBundle) + visitMachineBundleAfter(CurBundle); + if (InBundle) + report("BundledSucc flag set on last instruction in block", &MFI->back()); + visitMachineBasicBlockAfter(MFI); + } + visitMachineFunctionAfter(); + + if (OutFile) + delete OutFile; + else if (foundErrors) + report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); + + // Clean up. + regsLive.clear(); + regsDefined.clear(); + regsDead.clear(); + regsKilled.clear(); + regMasks.clear(); + regsLiveInButUnused.clear(); + MBBInfoMap.clear(); + + return false; // no changes +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF) { + assert(MF); + *OS << '\n'; + if (!foundErrors++) { + if (Banner) + *OS << "# " << Banner << '\n'; + MF->print(*OS, Indexes); + } + *OS << "*** Bad machine code: " << msg << " ***\n" + << "- function: " << MF->getName() << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { + assert(MBB); + report(msg, MBB->getParent()); + *OS << "- basic block: BB#" << MBB->getNumber() + << ' ' << MBB->getName() + << " (" << (const void*)MBB << ')'; + if (Indexes) + *OS << " [" << Indexes->getMBBStartIdx(MBB) + << ';' << Indexes->getMBBEndIdx(MBB) << ')'; + *OS << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineInstr *MI) { + assert(MI); + report(msg, MI->getParent()); + *OS << "- instruction: "; + if (Indexes && Indexes->hasIndex(MI)) + *OS << Indexes->getInstructionIndex(MI) << '\t'; + MI->print(*OS, TM); +} + +void MachineVerifier::report(const char *msg, + const MachineOperand *MO, unsigned MONum) { + assert(MO); + report(msg, MO->getParent()); + *OS << "- operand " << MONum << ": "; + MO->print(*OS, TM); + *OS << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI) { + report(msg, MF); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI) { + report(msg, MBB); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + +void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { + BBInfo &MInfo = MBBInfoMap[MBB]; + if (!MInfo.reachable) { + MInfo.reachable = true; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) + markReachable(*SuI); + } +} + +void MachineVerifier::visitMachineFunctionBefore() { + lastIndex = SlotIndex(); + regsReserved = MRI->getReservedRegs(); + + // A sub-register of a reserved register is also reserved + for (int Reg = regsReserved.find_first(); Reg>=0; + Reg = regsReserved.find_next(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + // FIXME: This should probably be: + // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register"); + regsReserved.set(*SubRegs); + } + } + + markReachable(&MF->front()); + + // Build a set of the basic blocks in the function. + FunctionBlocks.clear(); + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + FunctionBlocks.insert(I); + BBInfo &MInfo = MBBInfoMap[I]; + + MInfo.Preds.insert(I->pred_begin(), I->pred_end()); + if (MInfo.Preds.size() != I->pred_size()) + report("MBB has duplicate entries in its predecessor list.", I); + + MInfo.Succs.insert(I->succ_begin(), I->succ_end()); + if (MInfo.Succs.size() != I->succ_size()) + report("MBB has duplicate entries in its successor list.", I); + } +} + +// Does iterator point to a and b as the first two elements? +static bool matchPair(MachineBasicBlock::const_succ_iterator i, + const MachineBasicBlock *a, const MachineBasicBlock *b) { + if (*i == a) + return *++i == b; + if (*i == b) + return *++i == a; + return false; +} + +void +MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { + FirstTerminator = 0; + + if (MRI->isSSA()) { + // If this block has allocatable physical registers live-in, check that + // it is an entry block or landing pad. + for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); + LI != LE; ++LI) { + unsigned reg = *LI; + if (isAllocatable(reg) && !MBB->isLandingPad() && + MBB != MBB->getParent()->begin()) { + report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); + } + } + } + + // Count the number of landing pad successors. + SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if ((*I)->isLandingPad()) + LandingPadSuccs.insert(*I); + if (!FunctionBlocks.count(*I)) + report("MBB has successor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Preds.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the predecessor list of the successor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + // Check the predecessor list. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (!FunctionBlocks.count(*I)) + report("MBB has predecessor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Succs.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the successor list of the predecessor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); + const BasicBlock *BB = MBB->getBasicBlock(); + if (LandingPadSuccs.size() > 1 && + !(AsmInfo && + AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && + BB && isa<SwitchInst>(BB->getTerminator()))) + report("MBB has more than one landing pad successor", MBB); + + // Call AnalyzeBranch. If it succeeds, there several more conditions to check. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), + TBB, FBB, Cond)) { + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's + // check whether its answers match up with reality. + if (!TBB && !FBB) { + // Block falls through to its successor. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actually fall + // out the bottom of the function. + } else if (MBB->succ_size() == LandingPadSuccs.size()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actuall fall + // out of the block. + } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { + report("MBB exits via unconditional fall-through but doesn't have " + "exactly one CFG successor!", MBB); + } else if (!MBB->isSuccessor(MBBI)) { + report("MBB exits via unconditional fall-through but its successor " + "differs from its CFG successor!", MBB); + } + if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() && + !TII->isPredicated(getBundleStart(&MBB->back()))) { + report("MBB exits via unconditional fall-through but ends with a " + "barrier instruction!", MBB); + } + if (!Cond.empty()) { + report("MBB exits via unconditional fall-through but has a condition!", + MBB); + } + } else if (TBB && !FBB && Cond.empty()) { + // Block unconditionally branches somewhere. + if (MBB->succ_size() != 1+LandingPadSuccs.size()) { + report("MBB exits via unconditional branch but doesn't have " + "exactly one CFG successor!", MBB); + } else if (!MBB->isSuccessor(TBB)) { + report("MBB exits via unconditional branch but the CFG " + "successor doesn't match the actual successor!", MBB); + } + if (MBB->empty()) { + report("MBB exits via unconditional branch but doesn't contain " + "any instructions!", MBB); + } else if (!getBundleStart(&MBB->back())->isBarrier()) { + report("MBB exits via unconditional branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!getBundleStart(&MBB->back())->isTerminator()) { + report("MBB exits via unconditional branch but the branch isn't a " + "terminator instruction!", MBB); + } + } else if (TBB && !FBB && !Cond.empty()) { + // Block conditionally branches somewhere, otherwise falls through. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + report("MBB conditionally falls through out of function!", MBB); + } else if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (&*MBBI != TBB) + report("MBB exits via conditional branch/fall-through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/fall-through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/fall-through but doesn't have " + "exactly two CFG successors!", MBB); + } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { + report("MBB exits via conditional branch/fall-through but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/fall-through but doesn't " + "contain any instructions!", MBB); + } else if (getBundleStart(&MBB->back())->isBarrier()) { + report("MBB exits via conditional branch/fall-through but ends with a " + "barrier instruction!", MBB); + } else if (!getBundleStart(&MBB->back())->isTerminator()) { + report("MBB exits via conditional branch/fall-through but the branch " + "isn't a terminator instruction!", MBB); + } + } else if (TBB && FBB) { + // Block conditionally branches somewhere, otherwise branches + // somewhere else. + if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (FBB != TBB) + report("MBB exits via conditional branch/branch through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/branch through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/branch but doesn't have " + "exactly two CFG successors!", MBB); + } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { + report("MBB exits via conditional branch/branch but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/branch but doesn't " + "contain any instructions!", MBB); + } else if (!getBundleStart(&MBB->back())->isBarrier()) { + report("MBB exits via conditional branch/branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!getBundleStart(&MBB->back())->isTerminator()) { + report("MBB exits via conditional branch/branch but the branch " + "isn't a terminator instruction!", MBB); + } + if (Cond.empty()) { + report("MBB exits via conditinal branch/branch but there's no " + "condition!", MBB); + } + } else { + report("AnalyzeBranch returned invalid data!", MBB); + } + } + + regsLive.clear(); + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) { + if (!TargetRegisterInfo::isPhysicalRegister(*I)) { + report("MBB live-in list contains non-physical register", MBB); + continue; + } + regsLive.insert(*I); + for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); + } + regsLiveInButUnused = regsLive; + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + assert(MFI && "Function has no frame info"); + BitVector PR = MFI->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { + regsLive.insert(I); + for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); + } + + regsKilled.clear(); + regsDefined.clear(); + + if (Indexes) + lastIndex = Indexes->getMBBStartIdx(MBB); +} + +// This function gets called for all bundle headers, including normal +// stand-alone unbundled instructions. +void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { + if (Indexes && Indexes->hasIndex(MI)) { + SlotIndex idx = Indexes->getInstructionIndex(MI); + if (!(idx > lastIndex)) { + report("Instruction index out of order", MI); + *OS << "Last instruction was at " << lastIndex << '\n'; + } + lastIndex = idx; + } + + // Ensure non-terminators don't follow terminators. + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { + if (!FirstTerminator) + FirstTerminator = MI; + } else if (FirstTerminator) { + report("Non-terminator instruction after the first terminator", MI); + *OS << "First terminator was:\t" << *FirstTerminator; + } +} + +// The operands on an INLINEASM instruction must follow a template. +// Verify that the flag operands make sense. +void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { + // The first two operands on INLINEASM are the asm string and global flags. + if (MI->getNumOperands() < 2) { + report("Too few operands on inline asm", MI); + return; + } + if (!MI->getOperand(0).isSymbol()) + report("Asm string must be an external symbol", MI); + if (!MI->getOperand(1).isImm()) + report("Asm flags must be an immediate", MI); + // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, + // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16. + if (!isUInt<5>(MI->getOperand(1).getImm())) + report("Unknown asm flags", &MI->getOperand(1), 1); + + assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + unsigned NumOps; + for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) { + const MachineOperand &MO = MI->getOperand(OpNo); + // There may be implicit ops after the fixed operands. + if (!MO.isImm()) + break; + NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm()); + } + + if (OpNo > MI->getNumOperands()) + report("Missing operands in last group", MI); + + // An optional MDNode follows the groups. + if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata()) + ++OpNo; + + // All trailing operands must be implicit registers. + for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (!MO.isReg() || !MO.isImplicit()) + report("Expected implicit register after groups", &MO, OpNo); + } +} + +void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MI->getNumOperands() < MCID.getNumOperands()) { + report("Too few operands", MI); + *OS << MCID.getNumOperands() << " operands expected, but " + << MI->getNumExplicitOperands() << " given.\n"; + } + + // Check the tied operands. + if (MI->isInlineAsm()) + verifyInlineAsm(MI); + + // Check the MachineMemOperands for basic consistency. + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + if ((*I)->isLoad() && !MI->mayLoad()) + report("Missing mayLoad flag", MI); + if ((*I)->isStore() && !MI->mayStore()) + report("Missing mayStore flag", MI); + } + + // Debug values must not have a slot index. + // Other instructions must have one, unless they are inside a bundle. + if (LiveInts) { + bool mapped = !LiveInts->isNotInMIMap(MI); + if (MI->isDebugValue()) { + if (mapped) + report("Debug instruction has a slot index", MI); + } else if (MI->isInsideBundle()) { + if (mapped) + report("Instruction inside bundle has a slot index", MI); + } else { + if (!mapped) + report("Missing slot index", MI); + } + } + + StringRef ErrorInfo; + if (!TII->verifyInstruction(MI, ErrorInfo)) + report(ErrorInfo.data(), MI); +} + +void +MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const MCInstrDesc &MCID = MI->getDesc(); + + // The first MCID.NumDefs operands must be explicit register defines + if (MONum < MCID.getNumDefs()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; + if (!MO->isReg()) + report("Explicit definition must be a register", MO, MONum); + else if (!MO->isDef() && !MCOI.isOptionalDef()) + report("Explicit definition marked as use", MO, MONum); + else if (MO->isImplicit()) + report("Explicit definition marked as implicit", MO, MONum); + } else if (MONum < MCID.getNumOperands()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; + // Don't check if it's the last operand in a variadic instruction. See, + // e.g., LDM_RET in the arm back end. + if (MO->isReg() && + !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { + if (MO->isDef() && !MCOI.isOptionalDef()) + report("Explicit operand marked as def", MO, MONum); + if (MO->isImplicit()) + report("Explicit operand marked as implicit", MO, MONum); + } + + int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); + if (TiedTo != -1) { + if (!MO->isReg()) + report("Tied use must be a register", MO, MONum); + else if (!MO->isTied()) + report("Operand should be tied", MO, MONum); + else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) + report("Tied def doesn't match MCInstrDesc", MO, MONum); + } else if (MO->isReg() && MO->isTied()) + report("Explicit operand should not be tied", MO, MONum); + } else { + // ARM adds %reg0 operands to indicate predicates. We'll allow that. + if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) + report("Extra explicit operand on non-variadic instruction", MO, MONum); + } + + switch (MO->getType()) { + case MachineOperand::MO_Register: { + const unsigned Reg = MO->getReg(); + if (!Reg) + return; + if (MRI->tracksLiveness() && !MI->isDebugValue()) + checkLiveness(MO, MONum); + + // Verify the consistency of tied operands. + if (MO->isTied()) { + unsigned OtherIdx = MI->findTiedOperandIdx(MONum); + const MachineOperand &OtherMO = MI->getOperand(OtherIdx); + if (!OtherMO.isReg()) + report("Must be tied to a register", MO, MONum); + if (!OtherMO.isTied()) + report("Missing tie flags on tied operand", MO, MONum); + if (MI->findTiedOperandIdx(OtherIdx) != MONum) + report("Inconsistent tie links", MO, MONum); + if (MONum < MCID.getNumDefs()) { + if (OtherIdx < MCID.getNumOperands()) { + if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO)) + report("Explicit def tied to explicit use without tie constraint", + MO, MONum); + } else { + if (!OtherMO.isImplicit()) + report("Explicit def should be tied to implicit use", MO, MONum); + } + } + } + + // Verify two-address constraints after leaving SSA form. + unsigned DefIdx; + if (!MRI->isSSA() && MO->isUse() && + MI->isRegTiedToDefOperand(MONum, &DefIdx) && + Reg != MI->getOperand(DefIdx).getReg()) + report("Two-address instruction operands must be identical", MO, MONum); + + // Check register classes. + if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { + unsigned SubIdx = MO->getSubReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (SubIdx) { + report("Illegal subregister index for physical register", MO, MONum); + return; + } + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { + if (!DRC->contains(Reg)) { + report("Illegal physical register for instruction", MO, MONum); + *OS << TRI->getName(Reg) << " is not a " + << DRC->getName() << " register.\n"; + } + } + } else { + // Virtual register. + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (SubIdx) { + const TargetRegisterClass *SRC = + TRI->getSubClassWithSubReg(RC, SubIdx); + if (!SRC) { + report("Invalid subregister index for virtual register", MO, MONum); + *OS << "Register class " << RC->getName() + << " does not support subreg index " << SubIdx << "\n"; + return; + } + if (RC != SRC) { + report("Invalid register class for subregister index", MO, MONum); + *OS << "Register class " << RC->getName() + << " does not fully support subreg index " << SubIdx << "\n"; + return; + } + } + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { + if (SubIdx) { + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(RC); + if (!SuperRC) { + report("No largest legal super class exists.", MO, MONum); + return; + } + DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx); + if (!DRC) { + report("No matching super-reg register class.", MO, MONum); + return; + } + } + if (!RC->hasSuperClassEq(DRC)) { + report("Illegal virtual register for instruction", MO, MONum); + *OS << "Expected a " << DRC->getName() << " register, but got a " + << RC->getName() << " register\n"; + } + } + } + } + break; + } + + case MachineOperand::MO_RegisterMask: + regMasks.push_back(MO->getRegMask()); + break; + + case MachineOperand::MO_MachineBasicBlock: + if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent())) + report("PHI operand is not in the CFG", MO, MONum); + break; + + case MachineOperand::MO_FrameIndex: + if (LiveStks && LiveStks->hasInterval(MO->getIndex()) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { + LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); + SlotIndex Idx = LiveInts->getInstructionIndex(MI); + if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { + report("Instruction loads from dead spill slot", MO, MONum); + *OS << "Live stack: " << LI << '\n'; + } + if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { + report("Instruction stores to dead spill slot", MO, MONum); + *OS << "Live stack: " << LI << '\n'; + } + } + break; + + default: + break; + } +} + +void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const unsigned Reg = MO->getReg(); + + // Both use and def operands can read a register. + if (MO->readsReg()) { + regsLiveInButUnused.erase(Reg); + + if (MO->isKill()) + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + // Check the cached regunit intervals. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { + LiveRangeQuery LRQ(*LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + << ' ' << *LI << '\n'; + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + } + } + } + } + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + // This is a virtual register interval. + const LiveInterval &LI = LiveInts->getInterval(Reg); + LiveRangeQuery LRQ(LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no live interval", MO, MONum); + } + } + } + + // Use of a dead register. + if (!regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else if (MRI->def_empty(Reg)) { + report("Reading virtual register without a def", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (!MI->isPHI()) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } + + if (MO->isDef()) { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + // Verify SSA form. + if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + report("Multiple virtual register defs in SSA form", MO, MONum); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); + DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << VNI->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + } +} + +void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { +} + +// This function gets called after visiting all instructions in a bundle. The +// argument points to the bundle header. +// Normal stand-alone instructions are also considered 'bundles', and this +// function is called for all of them. +void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + set_union(MInfo.regsKilled, regsKilled); + set_subtract(regsLive, regsKilled); regsKilled.clear(); + // Kill any masked registers. + while (!regMasks.empty()) { + const uint32_t *Mask = regMasks.pop_back_val(); + for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) + if (TargetRegisterInfo::isPhysicalRegister(*I) && + MachineOperand::clobbersPhysReg(Mask, *I)) + regsDead.push_back(*I); + } + set_subtract(regsLive, regsDead); regsDead.clear(); + set_union(regsLive, regsDefined); regsDefined.clear(); +} + +void +MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { + MBBInfoMap[MBB].regsLiveOut = regsLive; + regsLive.clear(); + + if (Indexes) { + SlotIndex stop = Indexes->getMBBEndIdx(MBB); + if (!(stop > lastIndex)) { + report("Block ends before last instruction index", MBB); + *OS << "Block ends at " << stop + << " last instruction was at " << lastIndex << '\n'; + } + lastIndex = stop; + } +} + +// Calculate the largest possible vregsPassed sets. These are the registers that +// can pass through an MBB live, but may not be live every time. It is assumed +// that all vregsPassed sets are empty before the call. +void MachineVerifier::calcRegsPassed() { + // First push live-out regs to successors' vregsPassed. Remember the MBBs that + // have any vregsPassed. + SmallPtrSet<const MachineBasicBlock*, 8> todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock &MBB(*MFI); + BBInfo &MInfo = MBBInfoMap[&MBB]; + if (!MInfo.reachable) + continue; + for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), + SuE = MBB.succ_end(); SuI != SuE; ++SuI) { + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.regsLiveOut)) + todo.insert(*SuI); + } + } + + // Iteratively push vregsPassed to successors. This will converge to the same + // final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) { + if (*SuI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.vregsPassed)) + todo.insert(*SuI); + } + } +} + +// Calculate the set of virtual registers that must be passed through each basic +// block in order to satisfy the requirements of successor blocks. This is very +// similar to calcRegsPassed, only backwards. +void MachineVerifier::calcRegsRequired() { + // First push live-in regs to predecessors' vregsRequired. + SmallPtrSet<const MachineBasicBlock*, 8> todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock &MBB(*MFI); + BBInfo &MInfo = MBBInfoMap[&MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), + PrE = MBB.pred_end(); PrI != PrE; ++PrI) { + BBInfo &PInfo = MBBInfoMap[*PrI]; + if (PInfo.addRequired(MInfo.vregsLiveIn)) + todo.insert(*PrI); + } + } + + // Iteratively push vregsRequired to predecessors. This will converge to the + // same final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (*PrI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*PrI]; + if (SInfo.addRequired(MInfo.vregsRequired)) + todo.insert(*PrI); + } + } +} + +// Check PHI instructions at the beginning of MBB. It is assumed that +// calcRegsPassed has been run so BBInfo::isLiveOut is valid. +void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { + SmallPtrSet<const MachineBasicBlock*, 8> seen; + for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + seen.clear(); + + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB(); + if (!Pre->isSuccessor(MBB)) + continue; + seen.insert(Pre); + BBInfo &PrInfo = MBBInfoMap[Pre]; + if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) + report("PHI operand is not live-out from predecessor", + &BBI->getOperand(i), i); + } + + // Did we see all predecessors? + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (!seen.count(*PrI)) { + report("Missing PHI operand", BBI); + *OS << "BB#" << (*PrI)->getNumber() + << " is a predecessor according to the CFG.\n"; + } + } + } +} + +void MachineVerifier::visitMachineFunctionAfter() { + calcRegsPassed(); + + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Skip unreachable MBBs. + if (!MInfo.reachable) + continue; + + checkPHIOps(MFI); + } + + // Now check liveness info if available + calcRegsRequired(); + + // Check for killed virtual registers that should be live out. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + if (MInfo.regsKilled.count(*I)) { + report("Virtual register killed in block, but needed live out.", MFI); + *OS << "Virtual register " << PrintReg(*I) + << " is used after the block.\n"; + } + } + + if (!MF->empty()) { + BBInfo &MInfo = MBBInfoMap[&MF->front()]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + report("Virtual register def doesn't dominate all uses.", + MRI->getVRegDef(*I)); + } + + if (LiveVars) + verifyLiveVariables(); + if (LiveInts) + verifyLiveIntervals(); +} + +void MachineVerifier::verifyLiveVariables() { + assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Our vregsRequired should be identical to LiveVariables' AliveBlocks + if (MInfo.vregsRequired.count(Reg)) { + if (!VI.AliveBlocks.test(MFI->getNumber())) { + report("LiveVariables: Block missing from AliveBlocks", MFI); + *OS << "Virtual register " << PrintReg(Reg) + << " must be live through the block.\n"; + } + } else { + if (VI.AliveBlocks.test(MFI->getNumber())) { + report("LiveVariables: Block should not be in AliveBlocks", MFI); + *OS << "Virtual register " << PrintReg(Reg) + << " is not needed live through the block.\n"; + } + } + } + } +} + +void MachineVerifier::verifyLiveIntervals() { + assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // Spilling and splitting may leave unused registers around. Skip them. + if (MRI->reg_nodbg_empty(Reg)) + continue; + + if (!LiveInts->hasInterval(Reg)) { + report("Missing live interval for virtual register", MF); + *OS << PrintReg(Reg, TRI) << " still has defs or uses\n"; + continue; + } + + const LiveInterval &LI = LiveInts->getInterval(Reg); + assert(Reg == LI.reg && "Invalid reg to interval mapping"); + verifyLiveInterval(LI); + } + + // Verify all the cached regunit intervals. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) + verifyLiveInterval(*LI); +} + +void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, + VNInfo *VNI) { + if (VNI->isUnused()) + return; + + const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + + if (!DefVNI) { + report("Valno not live at def and not marked unused", MF, LI); + *OS << "Valno #" << VNI->id << '\n'; + return; + } + + if (DefVNI != VNI) { + report("Live range at def has different valno", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " where valno #" << DefVNI->id << " is live\n"; + return; + } + + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid definition index", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + return; + } + + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef value is not defined at MBB start", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + } + return; + } + + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at def index", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + return; + } + + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + continue; + } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } + + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } + + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } +} + +void +MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, + LiveInterval::const_iterator I) { + const VNInfo *VNI = I->valno; + assert(VNI && "Live range has no valno"); + + if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { + report("Foreign valno in live range", MF, LI); + *OS << *I << " has a bad valno\n"; + } + + if (VNI->isUnused()) { + report("Live range valno is marked unused", MF, LI); + *OS << *I << '\n'; + } + + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (I->start != MBBStartIdx && I->start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LI); + *OS << *I << '\n'; + } + + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } + + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + return; + + // RegUnit intervals are allowed dead phis. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && + I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + return; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB, LI); + *OS << *I << '\n'; + return; + } + + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LI); + *OS << *I << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == LI.end() || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) + continue; + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } + + if (I->end.isDead()) { + if (!hasDeadDef) { + report("Instruction doesn't have a dead def operand", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", MI); + *OS << *I << " in " << LI << '\n'; + } + } + } + + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB; + // Is this live range the beginning of a non-PHIDef VN? + if (I->start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + return; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LI, MFI)); + // We don't know how to track physregs into a landing pad. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + MFI->isLandingPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } + + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(MFI); + + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + + // All predecessors must have a live-out value. + if (!PVNI) { + report("Register not marked live out of predecessor", *PI, LI); + *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " + << PEnd << '\n'; + continue; + } + + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { + report("Different value live out of predecessor", *PI, LI); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; + } + } + if (&*MFI == EndMBB) + break; + ++MFI; + } +} + +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) + verifyLiveIntervalValue(LI, *I); + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) + verifyLiveIntervalSegment(LI, I); + + // Check the LI only has one connected component. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + *OS << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + *OS << ' ' << (*I)->id; + *OS << '\n'; + } + } + } +} diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp new file mode 100644 index 0000000..48db200 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp @@ -0,0 +1,37 @@ +//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics compatible with +// Objective Caml 3.10.0, which uses a liveness-accurate static stack map. +// +// The frametable emitter is in OcamlGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" + +using namespace llvm; + +namespace { + class OcamlGC : public GCStrategy { + public: + OcamlGC(); + }; +} + +static GCRegistry::Add<OcamlGC> +X("ocaml", "ocaml 3.10-compatible GC"); + +void llvm::linkOcamlGC() { } + +OcamlGC::OcamlGC() { + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; +} diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp new file mode 100644 index 0000000..3982612 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -0,0 +1,193 @@ +//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes machine instruction PHIs to take advantage of +// opportunities created during DAG legalization. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phi-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); +STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); + +namespace { + class OptimizePHIs : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification + OptimizePHIs() : MachineFunctionPass(ID) { + initializeOptimizePHIsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + typedef SmallPtrSet<MachineInstr*, 16> InstrSet; + typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator; + + bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, + InstrSet &PHIsInCycle); + bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle); + bool OptimizeBB(MachineBasicBlock &MBB); + }; +} + +char OptimizePHIs::ID = 0; +char &llvm::OptimizePHIsID = OptimizePHIs::ID; +INITIALIZE_PASS(OptimizePHIs, "opt-phis", + "Optimize machine instruction PHIs", false, false) + +bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + MRI = &Fn.getRegInfo(); + TII = Fn.getTarget().getInstrInfo(); + + // Find dead PHI cycles and PHI cycles that can be replaced by a single + // value. InstCombine does these optimizations, but DAG legalization may + // introduce new opportunities, e.g., when i64 values are split up for + // 32-bit targets. + bool Changed = false; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= OptimizeBB(*I); + + return Changed; +} + +/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands +/// are copies of SingleValReg, possibly via copies through other PHIs. If +/// SingleValReg is zero on entry, it is set to the register with the single +/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that +/// have been scanned. +bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, + unsigned &SingleValReg, + InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + // Scan the PHI operands. + for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (SrcReg == DstReg) + continue; + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + + // Skip over register-to-register moves. + if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + if (!SrcMI) + return false; + + if (SrcMI->isPHI()) { + if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle)) + return false; + } else { + // Fail if there is more than one non-phi/non-move register. + if (SingleValReg != 0) + return false; + SingleValReg = SrcReg; + } + } + return true; +} + +/// IsDeadPHICycle - Check if the register defined by a PHI is only used by +/// other PHIs in a cycle. +bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "PHI destination is not a virtual register"); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg), + E = MRI->use_end(); I != E; ++I) { + MachineInstr *UseMI = &*I; + if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle)) + return false; + } + + return true; +} + +/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by +/// a single value. +bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator + MII = MBB.begin(), E = MBB.end(); MII != E; ) { + MachineInstr *MI = &*MII++; + if (!MI->isPHI()) + break; + + // Check for single-value PHI cycles. + unsigned SingleValReg = 0; + InstrSet PHIsInCycle; + if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && + SingleValReg != 0) { + unsigned OldReg = MI->getOperand(0).getReg(); + if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) + continue; + + MRI->replaceRegWith(OldReg, SingleValReg); + MI->eraseFromParent(); + ++NumPHICycles; + Changed = true; + continue; + } + + // Check for dead PHI cycles. + PHIsInCycle.clear(); + if (IsDeadPHICycle(MI, PHIsInCycle)) { + for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); + PI != PE; ++PI) { + MachineInstr *PhiMI = *PI; + if (&*MII == PhiMI) + ++MII; + PhiMI->eraseFromParent(); + } + ++NumDeadPHICycles; + Changed = true; + } + } + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp new file mode 100644 index 0000000..5584708 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -0,0 +1,644 @@ +//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions. This destroys SSA information, but is the desired input for +// some register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phielim" +#include "llvm/CodeGen/Passes.h" +#include "PHIEliminationUtils.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +using namespace llvm; + +static cl::opt<bool> +DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), + cl::Hidden, cl::desc("Disable critical edge splitting " + "during PHI elimination")); + +static cl::opt<bool> +SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), + cl::Hidden, cl::desc("Split all critical edges during " + "PHI elimination")); + +namespace { + class PHIElimination : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + LiveVariables *LV; + LiveIntervals *LIS; + + public: + static char ID; // Pass identification, replacement for typeid + PHIElimination() : MachineFunctionPass(ID) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &Fn); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + /// Split critical edges where necessary for good coalescer performance. + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, + MachineLoopInfo *MLI); + + // These functions are temporary abstractions around LiveVariables and + // LiveIntervals, so they can go away when LiveVariables does. + bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB); + bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB); + + typedef std::pair<unsigned, unsigned> BBVRegPair; + typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet<MachineInstr*, 4> ImpDefs; + + // Map reusable lowered PHI node -> incoming join register. + typedef DenseMap<MachineInstr*, unsigned, + MachineInstrExpressionTrait> LoweredPHIMap; + LoweredPHIMap LoweredPHIs; + }; +} + +STATISTIC(NumLowered, "Number of phis lowered"); +STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); +STATISTIC(NumReused, "Number of reused lowered phis"); + +char PHIElimination::ID = 0; +char& llvm::PHIEliminationID = PHIElimination::ID; + +INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", false, false) + +void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); + + bool Changed = false; + + // This pass takes the function out of SSA form. + MRI->leaveSSA(); + + // Split critical edges to help the coalescer. This does not yet support + // updating LiveIntervals, so we disable it. + if (!DisableEdgeSplitting && (LV || LIS)) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, MLI); + } + + // Populate VRegPHIUseCount + analyzePHINodes(MF); + + // Eliminate PHI instructions by inserting copies into predecessor blocks. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= EliminatePHINodes(MF, *I); + + // Remove dead IMPLICIT_DEF instructions. + for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(), + E = ImpDefs.end(); I != E; ++I) { + MachineInstr *DefMI = *I; + unsigned DefReg = DefMI->getOperand(0).getReg(); + if (MRI->use_nodbg_empty(DefReg)) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + } + } + + // Clean up the lowered PHI instructions. + for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); + I != E; ++I) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(I->first); + MF.DeleteMachineInstr(I->first); + } + + LoweredPHIs.clear(); + ImpDefs.clear(); + VRegPHIUseCount.clear(); + + return Changed; +} + +/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in +/// predecessor basic blocks. +/// +bool PHIElimination::EliminatePHINodes(MachineFunction &MF, + MachineBasicBlock &MBB) { + if (MBB.empty() || !MBB.front().isPHI()) + return false; // Quick exit for basic blocks without PHIs. + + // Get an iterator to the first instruction after the last PHI node (this may + // also be the end of the basic block). + MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); + + while (MBB.front().isPHI()) + LowerPHINode(MBB, AfterPHIsIt); + + return true; +} + +/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs. +/// This includes registers with no defs. +static bool isImplicitlyDefined(unsigned VirtReg, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg), + DE = MRI->def_end(); DI != DE; ++DI) + if (!DI->isImplicitDef()) + return false; + return true; +} + +/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node +/// are implicit_def's. +static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, + const MachineRegisterInfo *MRI) { + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI)) + return false; + return true; +} + + +/// LowerPHINode - Lower the PHI node at the top of the specified block, +/// +void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + ++NumLowered; + // Unlink the PHI node from the basic block, but don't delete the PHI yet. + MachineInstr *MPhi = MBB.remove(MBB.begin()); + + unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; + unsigned DestReg = MPhi->getOperand(0).getReg(); + assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); + bool isDead = MPhi->getOperand(0).isDead(); + + // Create a new register for the incoming PHI arguments. + MachineFunction &MF = *MBB.getParent(); + unsigned IncomingReg = 0; + bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? + + // Insert a register to register copy at the top of the current block (but + // after any remaining phi nodes) which copies the new incoming register + // into the phi node destination. + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + if (isSourceDefinedByImplicitDef(MPhi, MRI)) + // If all sources of a PHI node are implicit_def, just emit an + // implicit_def instead of a copy. + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); + else { + // Can we reuse an earlier PHI node? This only happens for critical edges, + // typically those created by tail duplication. + unsigned &entry = LoweredPHIs[MPhi]; + if (entry) { + // An identical PHI node was already lowered. Reuse the incoming register. + IncomingReg = entry; + reusedIncoming = true; + ++NumReused; + DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi); + } else { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); + entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + } + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), DestReg) + .addReg(IncomingReg); + } + + // Update live variable information if there is any. + if (LV) { + MachineInstr *PHICopy = prior(AfterPHIsIt); + + if (IncomingReg) { + LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); + + // Increment use count of the newly created virtual register. + LV->setPHIJoin(IncomingReg); + + // When we are reusing the incoming register, it may already have been + // killed in this block. The old kill will also have been inserted at + // AfterPHIsIt, so it appears before the current PHICopy. + if (reusedIncoming) + if (MachineInstr *OldKill = VI.findKill(&MBB)) { + DEBUG(dbgs() << "Remove old kill from " << *OldKill); + LV->removeVirtualRegisterKilled(IncomingReg, OldKill); + DEBUG(MBB.dump()); + } + + // Add information to LiveVariables to know that the incoming value is + // killed. Note that because the value is defined in several places (once + // each for each incoming block), the "def" block and instruction fields + // for the VarInfo is not filled in. + LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + } + + // Since we are going to be deleting the PHI node, if it is the last use of + // any registers, or if the value itself is dead, we need to move this + // information over to the new copy we just inserted. + LV->removeVirtualRegistersKilled(MPhi); + + // If the result is dead, update LV. + if (isDead) { + LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->removeVirtualRegisterDead(DestReg, MPhi); + } + } + + // Update LiveIntervals for the new copy or implicit def. + if (LIS) { + MachineInstr *NewInstr = prior(AfterPHIsIt); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); + + SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); + if (IncomingReg) { + // Add the region from the beginning of MBB to the copy instruction to + // IncomingReg's live interval. + LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); + if (!IncomingVNI) + IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, + LIS->getVNInfoAllocator()); + IncomingLI.addRange(LiveRange(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); + } + + LiveInterval &DestLI = LIS->getInterval(DestReg); + assert(DestLI.begin() != DestLI.end() && + "PHIs should have nonempty LiveIntervals."); + if (DestLI.endIndex().isDead()) { + // A dead PHI's live range begins and ends at the start of the MBB, but + // the lowered copy, which will still be dead, needs to begin and end at + // the copy instruction. + VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); + assert(OrigDestVNI && "PHI destination should be live at block entry."); + DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.createDeadDef(DestCopyIndex.getRegSlot(), + LIS->getVNInfoAllocator()); + DestLI.removeValNo(OrigDestVNI); + } else { + // Otherwise, remove the region from the beginning of MBB to the copy + // instruction from DestReg's live interval. + DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); + assert(DestVNI && "PHI destination should be live at its definition."); + DestVNI->def = DestCopyIndex.getRegSlot(); + } + } + + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), + MPhi->getOperand(i).getReg())]; + + // Now loop over all of the incoming arguments, changing them to copy into the + // IncomingReg register in the corresponding predecessor basic block. + SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; + for (int i = NumSrcs - 1; i >= 0; --i) { + unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); + bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || + isImplicitlyDefined(SrcReg, MRI); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + // Get the MachineBasicBlock equivalent of the BasicBlock that is the source + // path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + + // Check to make sure we haven't already emitted the copy for this block. + // This can happen because PHI nodes may have multiple entries for the same + // basic block. + if (!MBBsInsertedInto.insert(&opBlock)) + continue; // If the copy has already been emitted, we're done. + + // Find a safe location to insert the copy, this may be the first terminator + // in the block (or end()). + MachineBasicBlock::iterator InsertPos = + findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); + + // Insert the copy. + MachineInstr *NewSrcInstr = 0; + if (!reusedIncoming && IncomingReg) { + if (SrcUndef) { + // The source register is undefined, so there is no need for a real + // COPY, but we still need to ensure joint dominance by defs. + // Insert an IMPLICIT_DEF instruction. + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), + IncomingReg); + + // Clean up the old implicit-def, if there even was one. + if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) + if (DefMI->isImplicitDef()) + ImpDefs.insert(DefMI); + } else { + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); + } + } + + // We only need to update the LiveVariables kill of SrcReg if this was the + // last PHI use of SrcReg to be lowered on this CFG edge and it is not live + // out of the predecessor. We can also ignore undef sources. + if (LV && !SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] && + !LV->isLiveOut(SrcReg, opBlock)) { + // We want to be able to insert a kill of the register if this PHI (aka, + // the copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value + // is live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + + // Okay, if we now know that the value is not live out of the block, we + // can add a kill marker in this block saying that it kills the incoming + // value! + + // In our final twist, we have to decide which instruction kills the + // register. In most cases this is the copy, however, terminator + // instructions at the end of the block may also use the value. In this + // case, we should mark the last such terminator as being the killing + // block, not the copy. + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't insert a copy this time. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); + } + } + assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); + + // Finally, mark it killed. + LV->addVirtualRegisterKilled(SrcReg, KillInst); + + // This vreg no longer lives all of the way through opBlock. + unsigned opBlockNum = opBlock.getNumber(); + LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); + } + + if (LIS) { + if (NewSrcInstr) { + LIS->InsertMachineInstrInMaps(NewSrcInstr); + LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + } + + if (!SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) { + LiveInterval &SrcLI = LIS->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + SE = opBlock.succ_end(); SI != SE; ++SI) { + SlotIndex startIdx = LIS->getMBBStartIdx(*SI); + VNInfo *VNI = SrcLI.getVNInfoAt(startIdx); + + // Definitions by other PHIs are not truly live-in for our purposes. + if (VNI && VNI->def != startIdx) { + isLiveOut = true; + break; + } + } + + if (!isLiveOut) { + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't just insert a copy. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); + } + } + assert(KillInst->readsRegister(SrcReg) && + "Cannot find kill instruction"); + + SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); + SrcLI.removeRange(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); + } + } + } + } + + // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. + if (reusedIncoming || !IncomingReg) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MPhi); + MF.DeleteMachineInstr(MPhi); + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the number of uses of a virtual register which is +/// used in a PHI node. We map that to the BB the vreg is coming from. This is +/// used later to determine when the vreg is killed in the BB. +/// +void PHIElimination::analyzePHINodes(const MachineFunction& MF) { + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(), + BBI->getOperand(i).getReg())]; +} + +bool PHIElimination::SplitPHIEdges(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineLoopInfo *MLI) { + if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) + return false; // Quick exit for basic blocks without PHIs. + + const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0; + bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); + + bool Changed = false; + for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); + // Is there a critical edge from PreMBB to MBB? + if (PreMBB->succ_size() == 1) + continue; + + // Avoid splitting backedges of loops. It would introduce small + // out-of-line blocks into the loop which is very bad for code placement. + if (PreMBB == &MBB && !SplitAllCriticalEdges) + continue; + const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; + if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) + continue; + + // LV doesn't consider a phi use live-out, so isLiveOut only returns true + // when the source register is live-out for some other reason than a phi + // use. That means the copy we will insert in PreMBB won't be a kill, and + // there is a risk it may not be coalesced away. + // + // If the copy would be a kill, there is no need to split the edge. + if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) + continue; + + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + + // If Reg is not live-in to MBB, it means it must be live-in to some + // other PreMBB successor, and we can avoid the interference by splitting + // the edge. + // + // If Reg *is* live-in to MBB, the interference is inevitable and a copy + // is likely to be left after coalescing. If we are looking at a loop + // exiting edge, split it so we won't insert code in the loop, otherwise + // don't bother. + bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; + + // Check for a loop exiting edge. + if (!ShouldSplit && CurLoop != PreLoop) { + DEBUG({ + dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; + if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; + if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; + }); + // This edge could be entering a loop, exiting a loop, or it could be + // both: Jumping directly form one loop to the header of a sibling + // loop. + // Split unless this edge is entering CurLoop from an outer loop. + ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); + } + if (!ShouldSplit) + continue; + if (!PreMBB->SplitCriticalEdge(&MBB, this)) { + DEBUG(dbgs() << "Failed to split ciritcal edge.\n"); + continue; + } + Changed = true; + ++NumCriticalEdgesSplit; + } + } + return Changed; +} + +bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveIn() requires either LiveVariables or LiveIntervals"); + if (LIS) + return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB); + else + return LV->isLiveIn(Reg, *MBB); +} + +bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals"); + // LiveVariables considers uses in PHIs to be in the predecessor basic block, + // so that a register used only in a PHI is not live out of the block. In + // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than + // in the predecessor basic block, so that a register used only in a PHI is live + // out of the block. + if (LIS) { + const LiveInterval &LI = LIS->getInterval(Reg); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + if (LI.liveAt(LIS->getMBBStartIdx(*SI))) + return true; + } + return false; + } else { + return LV->isLiveOut(Reg, *MBB); + } +} diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp new file mode 100644 index 0000000..e1b56e9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -0,0 +1,61 @@ +//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PHIEliminationUtils.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg +// when following the CFG edge to SuccMBB. This needs to be after any def of +// SrcReg, but before any subsequent point where control flow might jump out of +// the basic block. +MachineBasicBlock::iterator +llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, + unsigned SrcReg) { + // Handle the trivial case trivially. + if (MBB->empty()) + return MBB->begin(); + + // Usually, we just want to insert the copy before the first terminator + // instruction. However, for the edge going to a landing pad, we must insert + // the copy before the call/invoke instruction. + if (!SuccMBB->isLandingPad()) + return MBB->getFirstTerminator(); + + // Discover any defs/uses in this basic block. + SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; + MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg), + RE = MRI.reg_end(); RI != RE; ++RI) { + MachineInstr* DefUseMI = &*RI; + if (DefUseMI->getParent() == MBB) + DefUsesInMBB.insert(DefUseMI); + } + + MachineBasicBlock::iterator InsertPoint; + if (DefUsesInMBB.empty()) { + // No defs. Insert the copy at the start of the basic block. + InsertPoint = MBB->begin(); + } else if (DefUsesInMBB.size() == 1) { + // Insert the copy immediately after the def/use. + InsertPoint = *DefUsesInMBB.begin(); + ++InsertPoint; + } else { + // Insert the copy immediately after the last def/use. + InsertPoint = MBB->end(); + while (!DefUsesInMBB.count(&*--InsertPoint)) {} + ++InsertPoint; + } + + // Make sure the copy goes after any phi nodes however. + return MBB->SkipPHIsAndLabels(InsertPoint); +} diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h new file mode 100644 index 0000000..9ac47fb4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h @@ -0,0 +1,25 @@ +//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H +#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H + +#include "llvm/CodeGen/MachineBasicBlock.h" + +namespace llvm { + /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from + /// SrcReg when following the CFG edge to SuccMBB. This needs to be after + /// any def of SrcReg, but before any subsequent point where control flow + /// might jump out of the basic block. + MachineBasicBlock::iterator + findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, + unsigned SrcReg); +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp new file mode 100644 index 0000000..1af65c8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -0,0 +1,746 @@ +//===-- Passes.cpp - Target independent code generation passes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, + cl::desc("Disable Post Regalloc")); +static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, + cl::desc("Disable branch folding")); +static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, + cl::desc("Disable tail duplication")); +static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, + cl::desc("Disable pre-register allocation tail duplication")); +static cl::opt<bool> DisableBlockPlacement("disable-block-placement", + cl::Hidden, cl::desc("Disable probability-driven block placement")); +static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", + cl::Hidden, cl::desc("Collect probability-driven block placement stats")); +static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, + cl::desc("Disable Stack Slot Coloring")); +static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, + cl::desc("Disable Machine Dead Code Elimination")); +static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden, + cl::desc("Disable Early If-conversion")); +static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, + cl::desc("Disable Machine Common Subexpression Elimination")); +static cl::opt<cl::boolOrDefault> +OptimizeRegAlloc("optimize-regalloc", cl::Hidden, + cl::desc("Enable optimized register allocation compilation path.")); +static cl::opt<cl::boolOrDefault> +EnableMachineSched("enable-misched", cl::Hidden, + cl::desc("Enable the machine instruction scheduling pass.")); +static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden, + cl::desc("Use strong PHI elimination.")); +static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", + cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, + cl::desc("Disable Machine Sinking")); +static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, + cl::desc("Disable Loop Strength Reduction Pass")); +static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, + cl::desc("Disable Codegen Prepare")); +static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden, + cl::desc("Disable Copy Propagation pass")); +static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); +static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, + cl::desc("Dump garbage collector data")); +static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +static cl::opt<std::string> +PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, + cl::desc("Print machine instrs"), + cl::value_desc("pass-name"), cl::init("option-unspecified")); + +// Experimental option to run live interval analysis early. +static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, + cl::desc("Run live interval analysis earlier in the pipeline")); + +/// Allow standard passes to be disabled by command line options. This supports +/// simple binary flags that either suppress the pass or do nothing. +/// i.e. -disable-mypass=false has no effect. +/// These should be converted to boolOrDefault in order to use applyOverride. +static AnalysisID applyDisable(AnalysisID PassID, bool Override) { + if (Override) + return 0; + return PassID; +} + +/// Allow Pass selection to be overriden by command line options. This supports +/// flags with ternary conditions. TargetID is passed through by default. The +/// pass is suppressed when the option is false. When the option is true, the +/// StandardID is selected if the target provides no default. +static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, + AnalysisID StandardID) { + switch (Override) { + case cl::BOU_UNSET: + return TargetID; + case cl::BOU_TRUE: + if (TargetID) + return TargetID; + if (StandardID == 0) + report_fatal_error("Target cannot enable pass"); + return StandardID; + case cl::BOU_FALSE: + return 0; + } + llvm_unreachable("Invalid command line option state"); +} + +/// Allow standard passes to be disabled by the command line, regardless of who +/// is adding the pass. +/// +/// StandardID is the pass identified in the standard pass pipeline and provided +/// to addPass(). It may be a target-specific ID in the case that the target +/// directly adds its own pass, but in that case we harmlessly fall through. +/// +/// TargetID is the pass that the target has configured to override StandardID. +/// +/// StandardID may be a pseudo ID. In that case TargetID is the name of the real +/// pass to run. This allows multiple options to control a single pass depending +/// on where in the pipeline that pass is added. +static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { + if (StandardID == &PostRASchedulerID) + return applyDisable(TargetID, DisablePostRA); + + if (StandardID == &BranchFolderPassID) + return applyDisable(TargetID, DisableBranchFold); + + if (StandardID == &TailDuplicateID) + return applyDisable(TargetID, DisableTailDuplicate); + + if (StandardID == &TargetPassConfig::EarlyTailDuplicateID) + return applyDisable(TargetID, DisableEarlyTailDup); + + if (StandardID == &MachineBlockPlacementID) + return applyDisable(TargetID, DisableBlockPlacement); + + if (StandardID == &StackSlotColoringID) + return applyDisable(TargetID, DisableSSC); + + if (StandardID == &DeadMachineInstructionElimID) + return applyDisable(TargetID, DisableMachineDCE); + + if (StandardID == &EarlyIfConverterID) + return applyDisable(TargetID, DisableEarlyIfConversion); + + if (StandardID == &MachineLICMID) + return applyDisable(TargetID, DisableMachineLICM); + + if (StandardID == &MachineCSEID) + return applyDisable(TargetID, DisableMachineCSE); + + if (StandardID == &MachineSchedulerID) + return applyOverride(TargetID, EnableMachineSched, StandardID); + + if (StandardID == &TargetPassConfig::PostRAMachineLICMID) + return applyDisable(TargetID, DisablePostRAMachineLICM); + + if (StandardID == &MachineSinkingID) + return applyDisable(TargetID, DisableMachineSink); + + if (StandardID == &MachineCopyPropagationID) + return applyDisable(TargetID, DisableCopyProp); + + return TargetID; +} + +//===---------------------------------------------------------------------===// +/// TargetPassConfig +//===---------------------------------------------------------------------===// + +INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", + "Target Pass Configuration", false, false) +char TargetPassConfig::ID = 0; + +// Pseudo Pass IDs. +char TargetPassConfig::EarlyTailDuplicateID = 0; +char TargetPassConfig::PostRAMachineLICMID = 0; + +namespace llvm { +class PassConfigImpl { +public: + // List of passes explicitly substituted by this target. Normally this is + // empty, but it is a convenient way to suppress or replace specific passes + // that are part of a standard pass pipeline without overridding the entire + // pipeline. This mechanism allows target options to inherit a standard pass's + // user interface. For example, a target may disable a standard pass by + // default by substituting a pass ID of zero, and the user may still enable + // that standard pass with an explicit command line option. + DenseMap<AnalysisID,AnalysisID> TargetPasses; + + /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass + /// is inserted after each instance of the first one. + SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses; +}; +} // namespace llvm + +// Out of line virtual method. +TargetPassConfig::~TargetPassConfig() { + delete Impl; +} + +// Out of line constructor provides default values for pass options and +// registers all common codegen passes. +TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) + : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0), + Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false), + DisableVerify(false), + EnableTailMerge(true) { + + Impl = new PassConfigImpl(); + + // Register all target independent codegen passes to activate their PassIDs, + // including this pass itself. + initializeCodeGen(*PassRegistry::getPassRegistry()); + + // Substitute Pseudo Pass IDs for real ones. + substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); + substitutePass(&PostRAMachineLICMID, &MachineLICMID); + + // Temporarily disable experimental passes. + const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); + if (!ST.enableMachineScheduler()) + disablePass(&MachineSchedulerID); +} + +/// Insert InsertedPassID pass after TargetPassID. +void TargetPassConfig::insertPass(AnalysisID TargetPassID, + AnalysisID InsertedPassID) { + assert(TargetPassID != InsertedPassID && "Insert a pass after itself!"); + std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID); + Impl->InsertedPasses.push_back(P); +} + +/// createPassConfig - Create a pass configuration object to be used by +/// addPassToEmitX methods for generating a pipeline of CodeGen passes. +/// +/// Targets may override this to extend TargetPassConfig. +TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { + return new TargetPassConfig(this, PM); +} + +TargetPassConfig::TargetPassConfig() + : ImmutablePass(ID), PM(0) { + llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); +} + +// Helper to verify the analysis is really immutable. +void TargetPassConfig::setOpt(bool &Opt, bool Val) { + assert(!Initialized && "PassConfig is immutable"); + Opt = Val; +} + +void TargetPassConfig::substitutePass(AnalysisID StandardID, + AnalysisID TargetID) { + Impl->TargetPasses[StandardID] = TargetID; +} + +AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { + DenseMap<AnalysisID, AnalysisID>::const_iterator + I = Impl->TargetPasses.find(ID); + if (I == Impl->TargetPasses.end()) + return ID; + return I->second; +} + +/// Add a pass to the PassManager if that pass is supposed to be run. If the +/// Started/Stopped flags indicate either that the compilation should start at +/// a later pass or that it should stop after an earlier pass, then do not add +/// the pass. Finally, compare the current pass against the StartAfter +/// and StopAfter options and change the Started/Stopped flags accordingly. +void TargetPassConfig::addPass(Pass *P) { + assert(!Initialized && "PassConfig is immutable"); + + // Cache the Pass ID here in case the pass manager finds this pass is + // redundant with ones already scheduled / available, and deletes it. + // Fundamentally, once we add the pass to the manager, we no longer own it + // and shouldn't reference it. + AnalysisID PassID = P->getPassID(); + + if (Started && !Stopped) + PM->add(P); + if (StopAfter == PassID) + Stopped = true; + if (StartAfter == PassID) + Started = true; + if (Stopped && !Started) + report_fatal_error("Cannot stop compilation after pass that is not run"); +} + +/// Add a CodeGen pass at this point in the pipeline after checking for target +/// and command line overrides. +AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { + AnalysisID TargetID = getPassSubstitution(PassID); + AnalysisID FinalID = overridePass(PassID, TargetID); + if (FinalID == 0) + return FinalID; + + Pass *P = Pass::createPass(FinalID); + if (!P) + llvm_unreachable("Pass ID not registered"); + addPass(P); + // Add the passes after the pass P if there is any. + for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator + I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); + I != E; ++I) { + if ((*I).first == PassID) { + assert((*I).second && "Illegal Pass ID!"); + Pass *NP = Pass::createPass((*I).second); + assert(NP && "Pass ID not registered"); + addPass(NP); + } + } + return FinalID; +} + +void TargetPassConfig::printAndVerify(const char *Banner) { + if (TM->shouldPrintMachineCode()) + addPass(createMachineFunctionPrinterPass(dbgs(), Banner)); + + if (VerifyMachineCode) + addPass(createMachineVerifierPass(Banner)); +} + +/// Add common target configurable passes that perform LLVM IR to IR transforms +/// following machine independent optimization. +void TargetPassConfig::addIRPasses() { + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + addPass(createTypeBasedAliasAnalysisPass()); + addPass(createBasicAliasAnalysisPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + addPass(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { + addPass(createLoopStrengthReducePass()); + if (PrintLSR) + addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + addPass(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + addPass(createUnreachableBlockEliminationPass()); +} + +/// Turn exception handling constructs into something the code generators can +/// handle. +void TargetPassConfig::addPassesToHandleExceptions() { + switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + // FALLTHROUGH + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + case ExceptionHandling::Win64: + addPass(createDwarfEHPass(TM)); + break; + case ExceptionHandling::None: + addPass(createLowerInvokePass(TM->getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + addPass(createUnreachableBlockEliminationPass()); + break; + } +} + +/// Add pass to prepare the LLVM IR for code generation. This should be done +/// before exception handling preparation passes. +void TargetPassConfig::addCodeGenPrepare() { + if (getOptLevel() != CodeGenOpt::None && !DisableCGP) + addPass(createCodeGenPreparePass(getTargetLowering())); +} + +/// Add common passes that perform LLVM IR to IR transforms in preparation for +/// instruction selection. +void TargetPassConfig::addISelPrepare() { + addPass(createStackProtectorPass(getTargetLowering())); + + addPreISel(); + + if (PrintISelInput) + addPass(createPrintFunctionPass("\n\n" + "*** Final LLVM Code input to ISel ***\n", + &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + addPass(createVerifierPass()); +} + +/// Add the complete set of target-independent postISel code generator passes. +/// +/// This can be read as the standard order of major LLVM CodeGen stages. Stages +/// with nontrivial configuration or multiple passes are broken out below in +/// add%Stage routines. +/// +/// Any TargetPassConfig::addXX routine may be overriden by the Target. The +/// addPre/Post methods with empty header implementations allow injecting +/// target-specific fixups just before or after major stages. Additionally, +/// targets have the flexibility to change pass order within a stage by +/// overriding default implementation of add%Stage routines below. Each +/// technique has maintainability tradeoffs because alternate pass orders are +/// not well supported. addPre/Post works better if the target pass is easily +/// tied to a common pass. But if it has subtle dependencies on multiple passes, +/// the target should override the stage instead. +/// +/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection +/// before/after any target-independent pass. But it's currently overkill. +void TargetPassConfig::addMachinePasses() { + // Insert a machine instr printer pass after the specified pass. + // If -print-machineinstrs specified, print machineinstrs after all passes. + if (StringRef(PrintMachineInstrs.getValue()).equals("")) + TM->Options.PrintMachineCode = true; + else if (!StringRef(PrintMachineInstrs.getValue()) + .equals("option-unspecified")) { + const PassRegistry *PR = PassRegistry::getPassRegistry(); + const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); + const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); + assert (TPI && IPI && "Pass ID not registered!"); + const char *TID = (const char *)(TPI->getTypeInfo()); + const char *IID = (const char *)(IPI->getTypeInfo()); + insertPass(TID, IID); + } + + // Print the instruction selected machine code... + printAndVerify("After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + if (addPass(&ExpandISelPseudosID)) + printAndVerify("After ExpandISelPseudos"); + + // Add passes that optimize machine instructions in SSA form. + if (getOptLevel() != CodeGenOpt::None) { + addMachineSSAOptimization(); + } else { + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(&LocalStackSlotAllocationID); + } + + // Run pre-ra passes. + if (addPreRegAlloc()) + printAndVerify("After PreRegAlloc passes"); + + // Run register allocation and passes that are tightly coupled with it, + // including phi elimination and scheduling. + if (getOptimizeRegAlloc()) + addOptimizedRegAlloc(createRegAllocPass(true)); + else + addFastRegAlloc(createRegAllocPass(false)); + + // Run post-ra passes. + if (addPostRegAlloc()) + printAndVerify("After PostRegAlloc passes"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + addPass(&PrologEpilogCodeInserterID); + printAndVerify("After PrologEpilogCodeInserter"); + + /// Add passes that optimize machine instructions after register allocation. + if (getOptLevel() != CodeGenOpt::None) + addMachineLateOptimization(); + + // Expand pseudo instructions before second scheduling pass. + addPass(&ExpandPostRAPseudosID); + printAndVerify("After ExpandPostRAPseudos"); + + // Run pre-sched2 passes. + if (addPreSched2()) + printAndVerify("After PreSched2 passes"); + + // Second pass scheduler. + if (getOptLevel() != CodeGenOpt::None) { + addPass(&PostRASchedulerID); + printAndVerify("After PostRAScheduler"); + } + + // GC + if (addGCPasses()) { + if (PrintGCInfo) + addPass(createGCInfoPrinter(dbgs())); + } + + // Basic block placement. + if (getOptLevel() != CodeGenOpt::None) + addBlockPlacement(); + + if (addPreEmitPass()) + printAndVerify("After PreEmit passes"); +} + +/// Add passes that optimize machine instructions in SSA form. +void TargetPassConfig::addMachineSSAOptimization() { + // Pre-ra tail duplication. + if (addPass(&EarlyTailDuplicateID)) + printAndVerify("After Pre-RegAlloc TailDuplicate"); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(&OptimizePHIsID); + + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(&LocalStackSlotAllocationID); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(&DeadMachineInstructionElimID); + printAndVerify("After codegen DCE pass"); + + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + if (addILPOpts()) + printAndVerify("After ILP optimizations"); + + addPass(&MachineLICMID); + addPass(&MachineCSEID); + addPass(&MachineSinkingID); + printAndVerify("After Machine LICM, CSE and Sinking passes"); + + addPass(&PeepholeOptimizerID); + printAndVerify("After codegen peephole optimization pass"); +} + +//===---------------------------------------------------------------------===// +/// Register Allocation Pass Configuration +//===---------------------------------------------------------------------===// + +bool TargetPassConfig::getOptimizeRegAlloc() const { + switch (OptimizeRegAlloc) { + case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None; + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } + llvm_unreachable("Invalid optimize-regalloc state"); +} + +/// RegisterRegAlloc's global Registry tracks allocator registration. +MachinePassRegistry RegisterRegAlloc::Registry; + +/// A dummy default pass factory indicates whether the register allocator is +/// overridden on the command line. +static FunctionPass *useDefaultRegisterAllocator() { return 0; } +static RegisterRegAlloc +defaultRegAlloc("default", + "pick register allocator based on -O option", + useDefaultRegisterAllocator); + +/// -regalloc=... command line option. +static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, + RegisterPassParser<RegisterRegAlloc> > +RegAlloc("regalloc", + cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use")); + + +/// Instantiate the default register allocator pass for this target for either +/// the optimized or unoptimized allocation path. This will be added to the pass +/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc +/// in the optimized case. +/// +/// A target that uses the standard regalloc pass order for fast or optimized +/// allocation may still override this for per-target regalloc +/// selection. But -regalloc=... always takes precedence. +FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) { + if (Optimized) + return createGreedyRegisterAllocator(); + else + return createFastRegisterAllocator(); +} + +/// Find and instantiate the register allocation pass requested by this target +/// at the current optimization level. Different register allocators are +/// defined as separate passes because they may require different analysis. +/// +/// This helper ensures that the regalloc= option is always available, +/// even for targets that override the default allocator. +/// +/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs, +/// this can be folded into addPass. +FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + + // Initialize the global default. + if (!Ctor) { + Ctor = RegAlloc; + RegisterRegAlloc::setDefault(RegAlloc); + } + if (Ctor != useDefaultRegisterAllocator) + return Ctor(); + + // With no -regalloc= override, ask the target for a regalloc pass. + return createTargetRegisterAllocator(Optimized); +} + +/// Add the minimum set of target-independent passes that are required for +/// register allocation. No coalescing or scheduling. +void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + addPass(&PHIEliminationID); + addPass(&TwoAddressInstructionPassID); + + addPass(RegAllocPass); + printAndVerify("After Register Allocation"); +} + +/// Add standard target-independent passes that are tightly coupled with +/// optimized register allocation, including coalescing, machine instruction +/// scheduling, and register allocation itself. +void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&ProcessImplicitDefsID); + + // LiveVariables currently requires pure SSA form. + // + // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, + // LiveVariables can be removed completely, and LiveIntervals can be directly + // computed. (We still either need to regenerate kill flags after regalloc, or + // preferably fix the scavenger to not depend on them). + addPass(&LiveVariablesID); + + // Add passes that move from transformed SSA into conventional SSA. This is a + // "copy coalescing" problem. + // + if (!EnableStrongPHIElim) { + // Edge splitting is smarter with machine loop info. + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); + } + + // Eventually, we want to run LiveIntervals before PHI elimination. + if (EarlyLiveIntervals) + addPass(&LiveIntervalsID); + + addPass(&TwoAddressInstructionPassID); + + if (EnableStrongPHIElim) + addPass(&StrongPHIEliminationID); + + addPass(&RegisterCoalescerID); + + // PreRA instruction scheduling. + if (addPass(&MachineSchedulerID)) + printAndVerify("After Machine Scheduling"); + + // Add the selected register allocation pass. + addPass(RegAllocPass); + printAndVerify("After Register Allocation, before rewriter"); + + // Allow targets to change the register assignments before rewriting. + if (addPreRewrite()) + printAndVerify("After pre-rewrite passes"); + + // Finally rewrite virtual registers. + addPass(&VirtRegRewriterID); + printAndVerify("After Virtual Register Rewriter"); + + // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, + // but eventually, all users of it should probably be moved to addPostRA and + // it can go away. Currently, it's the intended place for targets to run + // FinalizeMachineBundles, because passes other than MachineScheduling an + // RegAlloc itself may not be aware of bundles. + if (addFinalizeRegAlloc()) + printAndVerify("After RegAlloc finalization"); + + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(&StackSlotColoringID); + + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(&PostRAMachineLICMID); + + printAndVerify("After StackSlotColoring and postra Machine LICM"); +} + +//===---------------------------------------------------------------------===// +/// Post RegAlloc Pass Configuration +//===---------------------------------------------------------------------===// + +/// Add passes that optimize machine instructions after register allocation. +void TargetPassConfig::addMachineLateOptimization() { + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (addPass(&BranchFolderPassID)) + printAndVerify("After BranchFolding"); + + // Tail duplication. + if (addPass(&TailDuplicateID)) + printAndVerify("After TailDuplicate"); + + // Copy propagation. + if (addPass(&MachineCopyPropagationID)) + printAndVerify("After copy propagation pass"); +} + +/// Add standard GC passes. +bool TargetPassConfig::addGCPasses() { + addPass(&GCMachineCodeAnalysisID); + return true; +} + +/// Add standard basic block placement passes. +void TargetPassConfig::addBlockPlacement() { + if (addPass(&MachineBlockPlacementID)) { + // Run a separate pass to collect block placement statistics. + if (EnableBlockPlacementStats) + addPass(&MachineBlockPlacementStatsID); + + printAndVerify("After machine block placement."); + } +} diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp new file mode 100644 index 0000000..a7439b5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -0,0 +1,577 @@ +//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Perform peephole optimizations on the machine code: +// +// - Optimize Extensions +// +// Optimization of sign / zero extension instructions. It may be extended to +// handle other instructions with similar properties. +// +// On some targets, some instructions, e.g. X86 sign / zero extension, may +// leave the source value in the lower part of the result. This optimization +// will replace some uses of the pre-extension value with uses of the +// sub-register of the results. +// +// - Optimize Comparisons +// +// Optimization of comparison instructions. For instance, in this code: +// +// sub r1, 1 +// cmp r1, 0 +// bz L1 +// +// If the "sub" instruction all ready sets (or could be modified to set) the +// same flag that the "cmp" instruction sets and that "bz" uses, then we can +// eliminate the "cmp" instruction. +// +// Another instance, in this code: +// +// sub r1, r3 | sub r1, imm +// cmp r3, r1 or cmp r1, r3 | cmp r1, imm +// bge L1 +// +// If the branch instruction can use flag from "sub", then we can replace +// "sub" with "subs" and eliminate the "cmp" instruction. +// +// - Optimize Bitcast pairs: +// +// v1 = bitcast v0 +// v2 = bitcast v1 +// = v2 +// => +// v1 = bitcast v0 +// = v0 +// +// - Optimize Loads: +// +// Loads that can be folded into a later instruction. A load is foldable +// if it loads to virtual registers and the virtual register defined has +// a single use. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "peephole-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +// Optimize Extensions +static cl::opt<bool> +Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +static cl::opt<bool> +DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), + cl::desc("Disable the peephole optimizer")); + +STATISTIC(NumReuse, "Number of extension results reused"); +STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); +STATISTIC(NumCmps, "Number of compares eliminated"); +STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); +STATISTIC(NumSelects, "Number of selects optimized"); + +namespace { + class PeepholeOptimizer : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + PeepholeOptimizer() : MachineFunctionPass(ID) { + initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + if (Aggressive) { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + } + + private: + bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs); + bool optimizeSelect(MachineInstr *MI); + bool isMoveImmediate(MachineInstr *MI, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); + }; +} + +char PeepholeOptimizer::ID = 0; +char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; +INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false) + +/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads +/// a single register and writes a single register and it does not modify the +/// source, and if the source value is preserved as a sub-register of the +/// result, then replace all reachable uses of the source with the subreg of the +/// result. +/// +/// Do not generate an EXTRACT that is used only in a debug use, as this changes +/// the code. Since this code does not currently share EXTRACTs, just ignore all +/// debug uses. +bool PeepholeOptimizer:: +optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs) { + unsigned SrcReg, DstReg, SubIdx; + if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) + return false; + + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + if (MRI->hasOneNonDBGUse(SrcReg)) + // No other uses. + return false; + + // Ensure DstReg can get a register class that actually supports + // sub-registers. Don't change the class until we commit. + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); + if (!DstRC) + return false; + + // The ext instr may be operating on a sub-register of SrcReg as well. + // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit + // register. + // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of + // SrcReg:SubIdx should be replaced. + bool UseSrcSubIdx = TM->getRegisterInfo()-> + getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; + + // The source has other uses. See if we can replace the other uses with use of + // the result of the extension. + SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); + UI != UE; ++UI) + ReachedBBs.insert(UI->getParent()); + + // Uses that are in the same BB of uses of the result of the instruction. + SmallVector<MachineOperand*, 8> Uses; + + // Uses that the result of the instruction can reach. + SmallVector<MachineOperand*, 8> ExtendedUses; + + bool ExtendLife = true; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); + UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + if (UseMI == MI) + continue; + + if (UseMI->isPHI()) { + ExtendLife = false; + continue; + } + + // Only accept uses of SrcReg:SubIdx. + if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) + continue; + + // It's an error to translate this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the <sext>, not the + // original value of %reg1024 before <sext>. + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) { + // Non-local uses where the result of the extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); + } else if (Aggressive && DT->dominates(MBB, UseMBB)) { + // We may want to extend the live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + } else { + // Both will be live out of the def MBB anyway. Don't extend live range of + // the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Extend the liveness of the extension result. + std::copy(ExtendedUses.begin(), ExtendedUses.end(), + std::back_inserter(Uses)); + + // Now replace all uses. + bool Changed = false; + if (!Uses.empty()) { + SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; + + // Look for PHI uses of the extended result, we don't want to extend the + // liveness of a PHI input. It breaks all kinds of assumptions down + // stream. A PHI use is expected to be the kill of its source values. + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); + UI != UE; ++UI) + if (UI->isPHI()) + PHIBBs.insert(UI->getParent()); + + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (PHIBBs.count(UseMBB)) + continue; + + // About to add uses of DstReg, clear DstReg's kill flags. + if (!Changed) { + MRI->clearKillFlags(DstReg); + MRI->constrainRegClass(DstReg, DstRC); + } + + unsigned NewVR = MRI->createVirtualRegister(RC); + MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); + // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. + if (UseSrcSubIdx) { + Copy->getOperand(0).setSubReg(SubIdx); + Copy->getOperand(0).setIsUndef(); + } + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + + return Changed; +} + +/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that +/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast +/// a value cross register classes), and the source is defined by another +/// bitcast instruction B. And if the register class of source of B matches +/// the register class of instruction A, then it is legal to replace all uses +/// of the def of A with source of B. e.g. +/// %vreg0<def> = VMOVSR %vreg1 +/// %vreg3<def> = VMOVRS %vreg0 +/// Replace all uses of vreg3 with vreg1. + +bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + + unsigned Def = 0; + unsigned Src = 0; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Def = Reg; + else if (Src) + // Multiple sources? + return false; + else + Src = Reg; + } + + assert(Def && Src && "Malformed bitcast instruction!"); + + MachineInstr *DefMI = MRI->getVRegDef(Src); + if (!DefMI || !DefMI->isBitcast()) + return false; + + unsigned SrcSrc = 0; + NumDefs = DefMI->getDesc().getNumDefs(); + NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = DefMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (!MO.isDef()) { + if (SrcSrc) + // Multiple sources? + return false; + else + SrcSrc = Reg; + } + } + + if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) + return false; + + MRI->replaceRegWith(Def, SrcSrc); + MRI->clearKillFlags(SrcSrc); + MI->eraseFromParent(); + ++NumBitcasts; + return true; +} + +/// optimizeCmpInstr - If the instruction is a compare and the previous +/// instruction it's comparing against all ready sets (or could be modified to +/// set) the same flag as the compare, then we can remove the comparison and use +/// the flag from the previous instruction. +bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + // If this instruction is a comparison against zero and isn't comparing a + // physical register, we can try to optimize it. + unsigned SrcReg, SrcReg2; + int CmpMask, CmpValue; + if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) + return false; + + // Attempt to optimize the comparison instruction. + if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { + ++NumCmps; + return true; + } + + return false; +} + +/// Optimize a select instruction. +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { + unsigned TrueOp = 0; + unsigned FalseOp = 0; + bool Optimizable = false; + SmallVector<MachineOperand, 4> Cond; + if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + return false; + if (!Optimizable) + return false; + if (!TII->optimizeSelect(MI)) + return false; + MI->eraseFromParent(); + ++NumSelects; + return true; +} + +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + return false; +} + +bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { + const MCInstrDesc &MCID = MI->getDesc(); + if (!MI->isMoveImmediate()) + return false; + if (MCID.getNumDefs() != 1) + return false; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + ImmDefMIs.insert(std::make_pair(Reg, MI)); + ImmDefRegs.insert(Reg); + return true; + } + + return false; +} + +/// foldImmediate - Try folding register operands that are defined by move +/// immediate instructions, i.e. a trivial constant folding optimization, if +/// and only if the def and use are in the same BB. +bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (ImmDefRegs.count(Reg) == 0) + continue; + DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); + assert(II != ImmDefMIs.end()); + if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { + ++NumImmFold; + return true; + } + } + return false; +} + +bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); + DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); + + if (DisablePeephole) + return false; + + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + MRI = &MF.getRegInfo(); + DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; + + bool Changed = false; + + SmallPtrSet<MachineInstr*, 8> LocalMIs; + SmallSet<unsigned, 4> ImmDefRegs; + DenseMap<unsigned, MachineInstr*> ImmDefMIs; + unsigned FoldAsLoadDefReg; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + bool SeenMoveImm = false; + LocalMIs.clear(); + ImmDefRegs.clear(); + ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; + + for (MachineBasicBlock::iterator + MII = I->begin(), MIE = I->end(); MII != MIE; ) { + MachineInstr *MI = &*MII; + // We may be erasing MI below, increment MII now. + ++MII; + LocalMIs.insert(MI); + + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. + if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || + MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || + MI->hasUnmodeledSideEffects()) { + FoldAsLoadDefReg = 0; + continue; + } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; + + if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || + (MI->isSelect() && optimizeSelect(MI))) { + // MI is deleted. + LocalMIs.erase(MI); + Changed = true; + continue; + } + + if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { + SeenMoveImm = true; + } else { + Changed |= optimizeExtInstr(MI, MBB, LocalMIs); + // optimizeExtInstr might have created new instructions after MI + // and before the already incremented MII. Adjust MII so that the + // next iteration sees the new instructions. + MII = MI; + ++MII; + if (SeenMoveImm) + Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); + } + + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + DEBUG(dbgs() << "Replacing: " << *MI); + DEBUG(dbgs() << " With: " << *FoldMI); + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + continue; + } + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp new file mode 100644 index 0000000..53fe273 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -0,0 +1,776 @@ +//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "llvm/CodeGen/Passes.h" +#include "AggressiveAntiDepBreaker.h" +#include "AntiDepBreaker.h" +#include "CriticalAntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +STATISTIC(NumNoops, "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); +STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); + +// Post-RA scheduling is enabled with +// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to +// override the target. +static cl::opt<bool> +EnablePostRAScheduler("post-RA-scheduler", + cl::desc("Enable scheduling after register allocation"), + cl::init(false), cl::Hidden); +static cl::opt<std::string> +EnableAntiDepBreaking("break-anti-dependencies", + cl::desc("Break post-RA scheduling anti-dependencies: " + "\"critical\", \"all\", or \"none\""), + cl::init("none"), cl::Hidden); + +// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod +static cl::opt<int> +DebugDiv("postra-sched-debugdiv", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); +static cl::opt<int> +DebugMod("postra-sched-debugmod", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); + +AntiDepBreaker::~AntiDepBreaker() { } + +namespace { + class PostRAScheduler : public MachineFunctionPass { + const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; + + public: + static char ID; + PostRAScheduler() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &Fn); + }; + char PostRAScheduler::ID = 0; + + class SchedulePostRATDList : public ScheduleDAGInstrs { + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + LatencyPriorityQueue AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// AntiDepBreak - Anti-dependence breaking object, or NULL if none + AntiDepBreaker *AntiDepBreak; + + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + + /// LiveRegs - true if the register is live. + BitVector LiveRegs; + + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + + public: + SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, const RegisterClassInfo&, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs); + + ~SchedulePostRATDList(); + + /// startBlock - Initialize register live-range state for scheduling in + /// this block. + /// + void startBlock(MachineBasicBlock *BB); + + /// Initialize the scheduler state for the next scheduling region. + virtual void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Notify that the scheduler has finished scheduling the current region. + virtual void exitRegion(); + + /// Schedule - Schedule the instruction range using list scheduling. + /// + void schedule(); + + void EmitSchedule(); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count); + + /// finishBlock - Clean up register live-range state. + /// + void finishBlock(); + + /// FixupKills - Fix register kill flags that have been made + /// invalid due to scheduling + /// + void FixupKills(MachineBasicBlock *MBB); + + private: + void ReleaseSucc(SUnit *SU, SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); + void StartBlockForKills(MachineBasicBlock *BB); + + // ToggleKillFlag - Toggle a register operand kill flag. Other + // adjustments may be made to the instruction if necessary. Return + // true if the operand has been deleted, false if not. + bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + void dumpSchedule() const; + }; +} + +char &llvm::PostRASchedulerID = PostRAScheduler::ID; + +INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", + "Post RA top-down list latency scheduler", false, false) + +SchedulePostRATDList::SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, const RegisterClassInfo &RCI, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), + LiveRegs(TRI->getNumRegs()) +{ + const TargetMachine &TM = MF.getTarget(); + const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); + HazardRec = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + + assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || + MRI.tracksLiveness()) && + "Live-ins must be accurate for anti-dependency breaking"); + AntiDepBreak = + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL)); +} + +SchedulePostRATDList::~SchedulePostRATDList() { + delete HazardRec; + delete AntiDepBreak; +} + +/// Initialize state associated with the next scheduling region. +void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + Sequence.clear(); +} + +/// Print the schedule before exiting the region. +void SchedulePostRATDList::exitRegion() { + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); + ScheduleDAGInstrs::exitRegion(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// dumpSchedule - dump the scheduled Sequence. +void SchedulePostRATDList::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} +#endif + +bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { + TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + + RegClassInfo.runOnMachineFunction(Fn); + + // Check for explicit enable/disable of post-ra scheduling. + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = + TargetSubtargetInfo::ANTIDEP_NONE; + SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; + if (EnablePostRAScheduler.getPosition() > 0) { + if (!EnablePostRAScheduler) + return false; + } else { + // Check that post-RA scheduling is enabled for this target. + // This may upgrade the AntiDepMode. + const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); + if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode, + CriticalPathRCs)) + return false; + } + + // Check for antidep breaking override... + if (EnableAntiDepBreaking.getPosition() > 0) { + AntiDepMode = (EnableAntiDepBreaking == "all") + ? TargetSubtargetInfo::ANTIDEP_ALL + : ((EnableAntiDepBreaking == "critical") + ? TargetSubtargetInfo::ANTIDEP_CRITICAL + : TargetSubtargetInfo::ANTIDEP_NONE); + } + + DEBUG(dbgs() << "PostRAScheduler\n"); + + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode, + CriticalPathRCs); + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { +#ifndef NDEBUG + // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int bbcnt = 0; + if (bbcnt++ % DebugDiv != DebugMod) + continue; + dbgs() << "*** DEBUG scheduling " << Fn.getName() + << ":BB#" << MBB->getNumber() << " ***\n"; + } +#endif + + // Initialize register live-range state for scheduling in this block. + Scheduler.startBlock(MBB); + + // Schedule each sequence of instructions not interrupted by a label + // or anything else that effectively needs to shut down scheduling. + MachineBasicBlock::iterator Current = MBB->end(); + unsigned Count = MBB->size(), CurrentCount = Count; + for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { + MachineInstr *MI = llvm::prior(I); + // Calls are not scheduling boundaries before register allocation, but + // post-ra we don't gain anything by scheduling across calls since we + // don't need to worry about register pressure. + if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { + Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); + Scheduler.EmitSchedule(); + Current = MI; + CurrentCount = Count - 1; + Scheduler.Observe(MI, CurrentCount); + } + I = MI; + --Count; + if (MI->isBundle()) + Count -= MI->getBundleSize(); + } + assert(Count == 0 && "Instruction count mismatch!"); + assert((MBB->begin() == Current || CurrentCount != 0) && + "Instruction count mismatch!"); + Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); + Scheduler.EmitSchedule(); + + // Clean up register live-range state. + Scheduler.finishBlock(); + + // Update register kills + Scheduler.FixupKills(MBB); + } + + return true; +} + +/// StartBlock - Initialize register live-range state for scheduling in +/// this block. +/// +void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { + // Call the superclass. + ScheduleDAGInstrs::startBlock(BB); + + // Reset the hazard recognizer and anti-dep breaker. + HazardRec->Reset(); + if (AntiDepBreak != NULL) + AntiDepBreak->StartBlock(BB); +} + +/// Schedule - Schedule the instruction range using list scheduling. +/// +void SchedulePostRATDList::schedule() { + // Build the scheduling graph. + buildSchedGraph(AA); + + if (AntiDepBreak != NULL) { + unsigned Broken = + AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, + EndIndex, DbgValues); + + if (Broken != 0) { + // We made changes. Update the dependency graph. + // Theoretically we could update the graph in place: + // When a live range is changed to use a different register, remove + // the def's anti-dependence *and* output-dependence edges due to + // that register, and add new anti-dependence and output-dependence + // edges based on the next live range of the register. + ScheduleDAG::clearDAG(); + buildSchedGraph(AA); + + NumFixedAnti += Broken; + } + } + + DEBUG(dbgs() << "********** List Scheduling **********\n"); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + AvailableQueue.initNodes(SUnits); + ListScheduleTopDown(); + AvailableQueue.releaseState(); +} + +/// Observe - Update liveness information to account for the current +/// instruction, which will not be scheduled. +/// +void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { + if (AntiDepBreak != NULL) + AntiDepBreak->Observe(MI, Count, EndIndex); +} + +/// FinishBlock - Clean up register live-range state. +/// +void SchedulePostRATDList::finishBlock() { + if (AntiDepBreak != NULL) + AntiDepBreak->FinishBlock(); + + // Call the superclass. + ScheduleDAGInstrs::finishBlock(); +} + +/// StartBlockForKills - Initialize register live-range state for updating kills +/// +void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { + // Start with no live registers. + LiveRegs.reset(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + LiveRegs.set(Reg); + // Repeat, for all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, + MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (LiveRegs.test(MO.getReg())) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + MachineInstrBuilder MIB(MF, MI); + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MIB.addReg(*SubRegs, RegState::ImplicitDefine); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +/// FixupKills - Fix the register kill flags, they may have been made +/// incorrect by instruction reordering. +/// +void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + + BitVector killedRegs(TRI->getNumRegs()); + + StartBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + LiveRegs.reset(Reg); + + // Repeat for all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. + killedRegs.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + bool kill = false; + if (!killedRegs.test(Reg)) { + kill = true; + // A register is not killed if any subregs are live... + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = !LiveRegs.test(Reg); + } + + if (MO.isKill() != kill) { + DEBUG(dbgs() << "Fixing " << MO << " in "); + // Warning: ToggleKillFlag may invalidate MO. + ToggleKillFlag(MI, MO); + DEBUG(MI->dump()); + } + + killedRegs.set(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + LiveRegs.set(Reg); + + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. +void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + return; + } +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + + // Standard scheduler algorithms will recompute the depth of the successor + // here as such: + // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + // + // However, we lazily compute node depth instead. Note that + // ScheduleNodeTopDown has already updated the depth of this node which causes + // all descendents to be marked dirty. Setting the successor depth explicitly + // here would cause depth to be recomputed for all its ancestors. If the + // successor is not yet ready (because of a transitively redundant edge) then + // this causes depth computation to be quadratic in the size of the DAG. + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. +void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + ReleaseSucc(SU, &*I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && + "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue.scheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void SchedulePostRATDList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // We're scheduling top-down but we're visiting the regions in + // bottom-up order, so we don't know the hazards at the start of a + // region. So assume no hazards (this should usually be ok as most + // blocks are a single region). + HazardRec->Reset(); + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // Add all leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) { + AvailableQueue.push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // In any cycle where we can't schedule any instructions, we must + // stall or emit a noop, depending on the target. + bool CycleHasInsts = false; + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + unsigned MinDepth = ~0u; + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() <= CurCycle) { + AvailableQueue.push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); + } + + DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); + + SUnit *FoundSUnit = 0; + bool HasNoopHazards = false; + while (!AvailableQueue.empty()) { + SUnit *CurSUnit = AvailableQueue.pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue.push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule... + if (FoundSUnit) { + // ... schedule the node... + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + CycleHasInsts = true; + if (HazardRec->atIssueLimit()) { + DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++CurCycle; + CycleHasInsts = false; + } + } else { + if (CycleHasInsts) { + DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, + // just advance the current cycle and try again. + DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++NumStalls; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + } + + ++CurCycle; + CycleHasInsts = false; + } + } + +#ifndef NDEBUG + unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +#endif // NDEBUG +} + +// EmitSchedule - Emit the machine code in scheduled order. +void SchedulePostRATDList::EmitSchedule() { + RegionBegin = RegionEnd; + + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) + BB->splice(RegionEnd, BB, FirstDbgValue); + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + BB->splice(RegionEnd, BB, SU->getInstr()); + else + // Null SUnit* is a noop. + TII->insertNoop(*BB, RegionEnd); + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (i == 0) + RegionBegin = prior(RegionEnd); + } + + // Reinsert any remaining debug_values. + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrivMI = P.second; + BB->splice(++OrigPrivMI, BB, DbgValue); + } + DbgValues.clear(); + FirstDbgValue = NULL; +} diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp new file mode 100644 index 0000000..e4e18c3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -0,0 +1,170 @@ +//===---------------------- ProcessImplicitDefs.cpp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "processimplicitdefs" + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace { +/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def +/// for each use. Add isUndef marker to implicit_def defs and their uses. +class ProcessImplicitDefs : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + SmallSetVector<MachineInstr*, 16> WorkList; + + void processImplicitDef(MachineInstr *MI); + bool canTurnIntoImplicitDef(MachineInstr *MI); + +public: + static char ID; + + ProcessImplicitDefs() : MachineFunctionPass(ID) { + initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); +}; +} // end anonymous namespace + +char ProcessImplicitDefs::ID = 0; +char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; + +INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions", false, false) +INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions", false, false) + +void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<AliasAnalysis>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { + if (!MI->isCopyLike() && + !MI->isInsertSubreg() && + !MI->isRegSequence() && + !MI->isPHI()) + return false; + for (MIOperands MO(MI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse() && MO->readsReg()) + return false; + return true; +} + +void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { + DEBUG(dbgs() << "Processing " << *MI); + unsigned Reg = MI->getOperand(0).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // For virtual regiusters, mark all uses as <undef>, and convert users to + // implicit-def when possible. + for (MachineRegisterInfo::use_nodbg_iterator UI = + MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &MO = UI.getOperand(); + MO.setIsUndef(); + MachineInstr *UserMI = MO.getParent(); + if (!canTurnIntoImplicitDef(UserMI)) + continue; + DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); + UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + WorkList.insert(UserMI); + } + MI->eraseFromParent(); + return; + } + + // This is a physreg implicit-def. + // Look for the first instruction to use or define an alias. + MachineBasicBlock::instr_iterator UserMI = MI; + MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); + bool Found = false; + for (++UserMI; UserMI != UserE; ++UserMI) { + for (MIOperands MO(UserMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned UserReg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + !TRI->regsOverlap(Reg, UserReg)) + continue; + // UserMI uses or redefines Reg. Set <undef> flags on all uses. + Found = true; + if (MO->isUse()) + MO->setIsUndef(); + } + if (Found) + break; + } + + // If we found the using MI, we can erase the IMPLICIT_DEF. + if (Found) { + DEBUG(dbgs() << "Physreg user: " << *UserMI); + MI->eraseFromParent(); + return; + } + + // Using instr wasn't found, it could be in another block. + // Leave the physreg IMPLICIT_DEF, but trim any extra operands. + for (unsigned i = MI->getNumOperands() - 1; i; --i) + MI->RemoveOperand(i); + DEBUG(dbgs() << "Keeping physreg: " << *MI); +} + +/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into +/// <undef> operands. +bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { + + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" + << "********** Function: " << MF.getName() << '\n'); + + bool Changed = false; + + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); + assert(WorkList.empty() && "Inconsistent worklist state"); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); + MFI != MFE; ++MFI) { + // Scan the basic block for implicit defs. + for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) + if (MBBI->isImplicitDef()) + WorkList.insert(MBBI); + + if (WorkList.empty()) + continue; + + DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size() + << " implicit defs.\n"); + Changed = true; + + // Drain the WorkList to recursively process any new implicit defs. + do processImplicitDef(WorkList.pop_back_val()); + while (!WorkList.empty()); + } + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp new file mode 100644 index 0000000..e5872df --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -0,0 +1,892 @@ +//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass provides an optional shrink wrapping variant of prolog/epilog +// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pei" +#include "PrologEpilogInserter.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <climits> + +using namespace llvm; + +char PEI::ID = 0; +char &llvm::PrologEpilogCodeInserterID = PEI::ID; + +INITIALIZE_PASS_BEGIN(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(PEI, "prologepilog", + "Prologue/Epilogue Insertion & Frame Finalization", + false, false) + +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); +STATISTIC(NumBytesStackSpace, + "Number of bytes used for stack in all functions"); + +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. +/// +bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + + assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. + calculateCallsInformation(Fn); + + // Allow the target machine to make some adjustments to the function + // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. + TFI->processFunctionBeforeCalleeSavedScan(Fn, RS); + + // Scan the function for modified callee saved registers and insert spill code + // for any callee saved registers that are modified. + calculateCalleeSavedRegisters(Fn); + + // Determine placement of CSR spill/restore code: + // - With shrink wrapping, place spills and restores to tightly + // enclose regions in the Machine CFG of the function where + // they are used. + // - Without shink wrapping (default), place all spills in the + // entry block, all restores in return blocks. + placeCSRSpillsAndRestores(Fn); + + // Add the code to save and restore the callee saved registers + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TFI->processFunctionBeforeFrameFinalized(Fn, RS); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack + // and MaxCallFrameSize variables. + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimiation + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn); + + // Clear any vregs created by virtual scavenging. + Fn.getRegInfo().clearVirtRegs(); + + delete RS; + clearAllSets(); + return true; +} + +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void PEI::calculateCallsInformation(MachineFunction &Fn) { + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + unsigned MaxCallFrameSize = 0; + bool AdjustsStack = MFI->adjustsStack(); + + // Get the function call frame set-up and tear-down instruction opcode + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + return; + + std::vector<MachineBasicBlock::iterator> FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImm(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + AdjustsStack = true; + FrameSDOps.push_back(I); + } else if (I->isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; + } + + MFI->setAdjustsStack(AdjustsStack); + MFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (std::vector<MachineBasicBlock::iterator>::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (TFI->canSimplifyCallFramePseudos(Fn)) + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } +} + + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. +void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { + const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = F.getTarget().getFrameLowering(); + MachineFrameInfo *MFI = F.getFrameInfo(); + + // Get the callee saved register list... + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers. + if (CSRegs == 0 || CSRegs[0] == 0) + return; + + // In Naked functions we aren't going to save any registers. + if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) + return; + + std::vector<CalleeSavedInfo> CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (F.getRegInfo().isPhysRegUsed(Reg)) { + // If the reg is modified, save it! + CSI.push_back(CalleeSavedInfo(Reg)); + } + } + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector<CalleeSavedInfo>::iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); + } + + I->setFrameIdx(FrameIdx); + } + + MFI->setCalleeSavedInfo(CSI); +} + +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function, handling shrink wrapping. +/// +void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + MFI->setCalleeSavedInfoValid(true); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + MachineBasicBlock::iterator I; + + if (!ShrinkWrapThisFunction) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + EntryBlock->addLiveIn(CSI[i].getReg()); + + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); + } + } + + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock* MBB = ReturnBlocks[ri]; + I = MBB->end(); --I; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that precede it. + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, + CSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } + return; + } + + // Insert spills. + std::vector<CalleeSavedInfo> blockCSI; + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet save = BI->second; + + if (save.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = save.begin(), + RE = save.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not collect callee saved register info"); + + I = MBB->begin(); + + // When shrink wrapping, use stack slot stores/loads. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. + // It's killed at the spill. + MBB->addLiveIn(blockCSI[i].getReg()); + + // Insert the spill to the stack frame. + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MBB, I, Reg, + true, + blockCSI[i].getFrameIdx(), + RC, TRI); + } + } + + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restore = BI->second; + + if (restore.empty()) + continue; + + blockCSI.clear(); + for (CSRegSet::iterator RI = restore.begin(), + RE = restore.end(); RI != RE; ++RI) { + blockCSI.push_back(CSI[*RI]); + } + assert(blockCSI.size() > 0 && + "Could not find callee saved register info"); + + // If MBB is empty and needs restores, insert at the _beginning_. + if (MBB->empty()) { + I = MBB->begin(); + } else { + I = MBB->end(); + --I; + + // Skip over all terminator instructions, which are part of the + // return sequence. + if (! I->isTerminator()) { + ++I; + } else { + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; + } + } + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that precede it. + for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, + blockCSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always nonnegative. + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 + && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -MFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If the stack grows down, we need to add the size to find the lowest + // address of the object. + Offset += MFI->getObjectSize(i); + + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; + for (int i = MaxCSFI; i >= MinCSFI ; --i) { + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); + } + } + + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Make sure the special register scavenging spill slot is closest to the + // frame pointer if a frame pointer is required. + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)) { + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector<int, 2>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); + } + + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); + } + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, + Offset, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + LargeStackObjs.insert(i); + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || + !RegInfo->useFPForScavengingIndex(Fn))) { + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector<int, 2>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); + } + + if (!TFI.targetHandlesStackFrameRounding()) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + } + + // Update frame info to pretend that this is part of the stack... + int64_t StackSize = Offset - LocalAreaOffset; + MFI->setStackSize(StackSize); + NumBytesStackSpace += StackSize; +} + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void PEI::insertPrologEpilogCode(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + + // Add prologue to the function... + TFI.emitPrologue(Fn); + + // Add epilogue to restore the callee-save registers in each exiting block + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().isReturn()) + TFI.emitEpilogue(Fn, *I); + } + + // Emit additional code that is required to support segmented stacks, if + // we've been asked for it. This, when linked with a runtime with support + // for segmented stacks (libgcc is one), will result in allocating stack + // space in small chunks instead of one large contiguous block. + if (Fn.getTarget().Options.EnableSegmentedStacks) + TFI.adjustForSegmentedStacks(Fn); + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + TFI.adjustForHiPEPrologue(Fn); +} + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void PEI::replaceFrameIndices(MachineFunction &Fn) { + if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + + const TargetMachine &TM = Fn.getTarget(); + assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { +#ifndef NDEBUG + int SPAdjCount = 0; // frame setup / destroy count. +#endif + int SPAdj = 0; // SP offset due to call frame setup / destroy. + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { +#ifndef NDEBUG + // Track whether we see even pairs of them + SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; +#endif + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); + + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; + + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); + + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = llvm::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } + + MI = 0; + break; + } + + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + } + + // If we have evenly matched pairs of frame setup / destroy instructions, + // make sure the adjustments come out to zero. If we don't have matched + // pairs, we can't be sure the missing bit isn't in another basic block + // due to a custom inserter playing tricks, so just asserting SPAdj==0 + // isn't sufficient. See tMOVCC on Thumb1, for example. + assert((SPAdjCount || SPAdj == 0) && + "Unbalanced call frame setup / destroy pairs?"); + } +} + +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +/// +/// FIXME: Iterating over the instruction stream is unnecessary. We can simply +/// iterate over the vreg use list, which at this point only contains machine +/// operands for which eliminateFrameIndex need a new scratch reg. +void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(BB); + + int SPAdj = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + MachineInstr *MI = I; + MachineBasicBlock::iterator J = llvm::next(I); + MachineBasicBlock::iterator P = I == BB->begin() ? + MachineBasicBlock::iterator(NULL) : llvm::prior(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + + // Replace this reference to the virtual register with the + // scratch register. + assert (ScratchReg && "Missing scratch register!"); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setUsed(ScratchReg); + } + } + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != llvm::prior(J)) { + BB->splice(J, BB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS->getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS->unprocess(P); + + // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I)); + } else + ++I; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h new file mode 100644 index 0000000..87fff9a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -0,0 +1,173 @@ +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This pass also implements a shrink wrapping variant of prolog/epilog +// insertion. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PEI_H +#define LLVM_CODEGEN_PEI_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + class RegScavenger; + class MachineBasicBlock; + + class PEI : public MachineFunctionPass { + public: + static char ID; + PEI() : MachineFunctionPass(ID) { + initializePEIPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn); + + private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Analysis info for spill/restore placement. + // "CSR": "callee saved register". + + // CSRegSet contains indices into the Callee Saved Register Info + // vector built by calculateCalleeSavedRegisters() and accessed + // via MF.getFrameInfo()->getCalleeSavedInfo(). + typedef SparseBitVector<> CSRegSet; + + // CSRegBlockMap maps MachineBasicBlocks to sets of callee + // saved register indices. + typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; + + // Set and maps for computing CSR spill/restore placement: + // used in function (UsedCSRegs) + // used in a basic block (CSRUsed) + // anticipatable in a basic block (Antic{In,Out}) + // available in a basic block (Avail{In,Out}) + // to be spilled at the entry to a basic block (CSRSave) + // to be restored at the end of a basic block (CSRRestore) + CSRegSet UsedCSRegs; + CSRegBlockMap CSRUsed; + CSRegBlockMap AnticIn, AnticOut; + CSRegBlockMap AvailIn, AvailOut; + CSRegBlockMap CSRSave; + CSRegBlockMap CSRRestore; + + // Entry and return blocks of the current function. + MachineBasicBlock* EntryBlock; + SmallVector<MachineBasicBlock*, 4> ReturnBlocks; + + // Map of MBBs to top level MachineLoops. + DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; + + // Flag to control shrink wrapping per-function: + // may choose to skip shrink wrapping for certain + // functions. + bool ShrinkWrapThisFunction; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the curren function. + bool FrameIndexVirtualScavenging; + +#ifndef NDEBUG + // Machine function handle. + MachineFunction* MF; + + // Flag indicating that the current function + // has at least one "short" path in the machine + // CFG from the entry block to an exit block. + bool HasFastExitPath; +#endif + + bool calculateSets(MachineFunction &Fn); + bool calcAnticInOut(MachineBasicBlock* MBB); + bool calcAvailInOut(MachineBasicBlock* MBB); + void calculateAnticAvail(MachineFunction &Fn); + bool addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4>& blks); + bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); + bool calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevSpills); + bool calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevRestores); + void placeSpillsAndRestores(MachineFunction &Fn); + void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + + // Initialize DFA sets, called before iterations. + void clearAnticAvailSets(); + // Clear all sets constructed by shrink wrapping. + void clearAllSets(); + + // Initialize all shrink wrapping data. + void initShrinkWrappingInfo(); + + // Convienences for dealing with machine loops. + MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); + MachineLoop* getTopLevelLoopParent(MachineLoop *LP); + + // Propgate CSRs used in MBB to all MBBs of loop LP. + void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); + + // Convenience for recognizing return blocks. + bool isReturnBlock(MachineBasicBlock* MBB); + +#ifndef NDEBUG + // Debugging methods. + + // Mark this function as having fast exit paths. + void findFastExitPath(); + + // Verify placement of spills/restores. + void verifySpillRestorePlacement(); + + std::string getBasicBlockName(const MachineBasicBlock* MBB); + std::string stringifyCSRegSet(const CSRegSet& s); + void dumpSet(const CSRegSet& s); + void dumpUsed(MachineBasicBlock* MBB); + void dumpAllUsed(); + void dumpSets(MachineBasicBlock* MBB); + void dumpSets1(MachineBasicBlock* MBB); + void dumpAllSets(); + void dumpSRSets(); +#endif + + }; +} // End llvm namespace +#endif diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp new file mode 100644 index 0000000..8564911 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -0,0 +1,132 @@ +//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PseudoSourceValue class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/raw_ostream.h" +#include <map> +using namespace llvm; + +namespace { +struct PSVGlobalsTy { + // PseudoSourceValues are immutable so don't need locking. + const PseudoSourceValue PSVs[4]; + sys::Mutex Lock; // Guards FSValues, but not the values inside it. + std::map<int, const PseudoSourceValue *> FSValues; + + PSVGlobalsTy() : PSVs() {} + ~PSVGlobalsTy() { + for (std::map<int, const PseudoSourceValue *>::iterator + I = FSValues.begin(), E = FSValues.end(); I != E; ++I) { + delete I->second; + } + } +}; + +static ManagedStatic<PSVGlobalsTy> PSVGlobals; + +} // anonymous namespace + +const PseudoSourceValue *PseudoSourceValue::getStack() +{ return &PSVGlobals->PSVs[0]; } +const PseudoSourceValue *PseudoSourceValue::getGOT() +{ return &PSVGlobals->PSVs[1]; } +const PseudoSourceValue *PseudoSourceValue::getJumpTable() +{ return &PSVGlobals->PSVs[2]; } +const PseudoSourceValue *PseudoSourceValue::getConstantPool() +{ return &PSVGlobals->PSVs[3]; } + +static const char *const PSVNames[] = { + "Stack", + "GOT", + "JumpTable", + "ConstantPool" +}; + +// FIXME: THIS IS A HACK!!!! +// Eventually these should be uniqued on LLVMContext rather than in a managed +// static. For now, we can safely use the global context for the time being to +// squeak by. +PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : + Value(Type::getInt8PtrTy(getGlobalContext()), + Subclass) {} + +void PseudoSourceValue::printCustom(raw_ostream &O) const { + O << PSVNames[this - PSVGlobals->PSVs]; +} + +const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { + PSVGlobalsTy &PG = *PSVGlobals; + sys::ScopedLock locked(PG.Lock); + const PseudoSourceValue *&V = PG.FSValues[FI]; + if (!V) + V = new FixedStackPseudoSourceValue(FI); + return V; +} + +bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { + if (this == getStack()) + return false; + if (this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return true; + llvm_unreachable("Unknown PseudoSourceValue!"); +} + +bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { + if (this == getStack() || + this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return false; + llvm_unreachable("Unknown PseudoSourceValue!"); +} + +bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { + if (this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return false; + return true; +} + +bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ + return MFI && MFI->isImmutableObjectIndex(FI); +} + +bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { + // Negative frame indices are used for special things that don't + // appear in LLVM IR. Non-negative indices may be used for things + // like static allocas. + if (!MFI) + return FI >= 0; + // Spill slots should not alias others. + return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI); +} + +bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { + if (!MFI) + return true; + // Spill slots will not alias any LLVM IR value. + return !MFI->isSpillSlotObjectIndex(FI); +} + +void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { + OS << "FixedStack" << FI; +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp new file mode 100644 index 0000000..c035590 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -0,0 +1,145 @@ +//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegAllocBase class which provides comon functionality +// for LiveIntervalUnion-based register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "RegAllocBase.h" +#include "Spiller.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#ifndef NDEBUG +#include "llvm/ADT/SparseBitVector.h" +#endif +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" + +using namespace llvm; + +STATISTIC(NumNewQueued , "Number of new live ranges queued"); + +// Temporary verification option until we can put verification inside +// MachineVerifier. +static cl::opt<bool, true> +VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), + cl::desc("Verify during register allocation")); + +const char *RegAllocBase::TimerGroupName = "Register Allocation"; +bool RegAllocBase::VerifyEnabled = false; + +//===----------------------------------------------------------------------===// +// RegAllocBase Implementation +//===----------------------------------------------------------------------===// + +void RegAllocBase::init(VirtRegMap &vrm, + LiveIntervals &lis, + LiveRegMatrix &mat) { + TRI = &vrm.getTargetRegInfo(); + MRI = &vrm.getRegInfo(); + VRM = &vrm; + LIS = &lis; + Matrix = &mat; + MRI->freezeReservedRegs(vrm.getMachineFunction()); + RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); +} + +// Visit all the live registers. If they are already assigned to a physical +// register, unify them with the corresponding LiveIntervalUnion, otherwise push +// them on the priority queue for later assignment. +void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + enqueue(&LIS->getInterval(Reg)); + } +} + +// Top-level driver to manage the queue of unassigned VirtRegs and call the +// selectOrSplit implementation. +void RegAllocBase::allocatePhysRegs() { + seedLiveRegs(); + + // Continue assigning vregs one at a time to available physical registers. + while (LiveInterval *VirtReg = dequeue()) { + assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + + // Unused registers can appear when the spiller coalesces snippets. + if (MRI->reg_nodbg_empty(VirtReg->reg)) { + DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + LIS->removeInterval(VirtReg->reg); + continue; + } + + // Invalidate all interference queries, live ranges could have changed. + Matrix->invalidateVirtRegs(); + + // selectOrSplit requests the allocator to return an available physical + // register if possible and populate a list of new live intervals that + // result from splitting. + DEBUG(dbgs() << "\nselectOrSplit " + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n'); + typedef SmallVector<LiveInterval*, 4> VirtRegVec; + VirtRegVec SplitVRegs; + unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); + + if (AvailablePhysReg == ~0u) { + // selectOrSplit failed to find a register! + const char *Msg = "ran out of registers during register allocation"; + // Probably caused by an inline asm. + MachineInstr *MI; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); + (MI = I.skipInstruction());) + if (MI->isInlineAsm()) + break; + if (MI) + MI->emitError(Msg); + else + report_fatal_error(Msg); + // Keep going after reporting the error. + VRM->assignVirt2Phys(VirtReg->reg, + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + continue; + } + + if (AvailablePhysReg) + Matrix->assign(*VirtReg, AvailablePhysReg); + + for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); + I != E; ++I) { + LiveInterval *SplitVirtReg = *I; + assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + LIS->removeInterval(SplitVirtReg->reg); + continue; + } + DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); + assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + "expect split value in virtual register"); + enqueue(SplitVirtReg); + ++NumNewQueued; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h new file mode 100644 index 0000000..064e40f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -0,0 +1,108 @@ +//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegAllocBase class, which is the skeleton of a basic +// register allocation algorithm and interface for extending it. It provides the +// building blocks on which to construct other experimental allocators and test +// the validity of two principles: +// +// - If virtual and physical register liveness is modeled using intervals, then +// on-the-fly interference checking is cheap. Furthermore, interferences can be +// lazily cached and reused. +// +// - Register allocation complexity, and generated code performance is +// determined by the effectiveness of live range splitting rather than optimal +// coloring. +// +// Following the first principle, interfering checking revolves around the +// LiveIntervalUnion data structure. +// +// To fulfill the second principle, the basic allocator provides a driver for +// incremental splitting. It essentially punts on the problem of register +// coloring, instead driving the assignment of virtual to physical registers by +// the cost of splitting. The basic allocator allows for heuristic reassignment +// of registers, if a more sophisticated allocator chooses to do that. +// +// This framework provides a way to engineer the compile time vs. code +// quality trade-off without relying on a particular theoretical solver. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCBASE +#define LLVM_CODEGEN_REGALLOCBASE + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/RegisterClassInfo.h" + +namespace llvm { + +template<typename T> class SmallVectorImpl; +class TargetRegisterInfo; +class VirtRegMap; +class LiveIntervals; +class LiveRegMatrix; +class Spiller; + +/// RegAllocBase provides the register allocation driver and interface that can +/// be extended to add interesting heuristics. +/// +/// Register allocators must override the selectOrSplit() method to implement +/// live range splitting. They must also override enqueue/dequeue to provide an +/// assignment order. +class RegAllocBase { +protected: + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + VirtRegMap *VRM; + LiveIntervals *LIS; + LiveRegMatrix *Matrix; + RegisterClassInfo RegClassInfo; + + RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {} + + virtual ~RegAllocBase() {} + + // A RegAlloc pass should call this before allocatePhysRegs. + void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat); + + // The top-level driver. The output is a VirtRegMap that us updated with + // physical register assignments. + void allocatePhysRegs(); + + // Get a temporary reference to a Spiller instance. + virtual Spiller &spiller() = 0; + + /// enqueue - Add VirtReg to the priority queue of unassigned registers. + virtual void enqueue(LiveInterval *LI) = 0; + + /// dequeue - Return the next unassigned register, or NULL. + virtual LiveInterval *dequeue() = 0; + + // A RegAlloc pass should override this to provide the allocation heuristics. + // Each call must guarantee forward progess by returning an available PhysReg + // or new set of split live virtual registers. It is up to the splitter to + // converge quickly toward fully spilled live ranges. + virtual unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; + + // Use this group name for NamedRegionTimer. + static const char *TimerGroupName; + +public: + /// VerifyEnabled - True when -verify-regalloc is given. + static bool VerifyEnabled; + +private: + void seedLiveRegs(); +}; + +} // end namespace llvm + +#endif // !defined(LLVM_CODEGEN_REGALLOCBASE) diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp new file mode 100644 index 0000000..0b6dc68 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -0,0 +1,293 @@ +//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RABasic function pass, which provides a minimal +// implementation of the basic register allocator. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" +#include "AllocationOrder.h" +#include "LiveDebugVariables.h" +#include "RegAllocBase.h" +#include "Spiller.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cstdlib> +#include <queue> + +using namespace llvm; + +static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", + createBasicRegisterAllocator); + +namespace { + struct CompSpillWeight { + bool operator()(LiveInterval *A, LiveInterval *B) const { + return A->weight < B->weight; + } + }; +} + +namespace { +/// RABasic provides a minimal implementation of the basic register allocation +/// algorithm. It prioritizes live virtual registers by spill weight and spills +/// whenever a register is unavailable. This is not practical in production but +/// provides a useful baseline both for measuring other allocators and comparing +/// the speed of the basic algorithm against other styles of allocators. +class RABasic : public MachineFunctionPass, public RegAllocBase +{ + // context + MachineFunction *MF; + + // state + std::auto_ptr<Spiller> SpillerInstance; + std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, + CompSpillWeight> Queue; + + // Scratch space. Allocated here to avoid repeated malloc calls in + // selectOrSplit(). + BitVector UsableRegs; + +public: + RABasic(); + + /// Return the pass name. + virtual const char* getPassName() const { + return "Basic Register Allocator"; + } + + /// RABasic analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + virtual Spiller &spiller() { return *SpillerInstance; } + + virtual float getPriority(LiveInterval *LI) { return LI->weight; } + + virtual void enqueue(LiveInterval *LI) { + Queue.push(LI); + } + + virtual LiveInterval *dequeue() { + if (Queue.empty()) + return 0; + LiveInterval *LI = Queue.top(); + Queue.pop(); + return LI; + } + + virtual unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs); + + /// Perform register allocation. + virtual bool runOnMachineFunction(MachineFunction &mf); + + // Helper for spilling all live virtual registers currently unified under preg + // that interfere with the most recently queried lvr. Return true if spilling + // was successful, and append any new spilled/split intervals to splitLVRs. + bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs); + + static char ID; +}; + +char RABasic::ID = 0; + +} // end anonymous namespace + +RABasic::RABasic(): MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); +} + +void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addRequired<CalculateSpillWeights>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequiredID(MachineDominatorsID); + AU.addPreservedID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RABasic::releaseMemory() { + SpillerInstance.reset(0); +} + + +// Spill or split all live virtual registers currently unified under PhysReg +// that interfere with VirtReg. The newly spilled or split live intervals are +// returned by appending them to SplitVRegs. +bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs) { + // Record each interference and determine if all are spillable before mutating + // either the union or live intervals. + SmallVector<LiveInterval*, 8> Intfs; + + // Collect interferences assigned to any alias of the physical register. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + Q.collectInterferingVRegs(); + if (Q.seenUnspillableVReg()) + return false; + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + if (!Intf->isSpillable() || Intf->weight > VirtReg.weight) + return false; + Intfs.push_back(Intf); + } + } + DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << + " interferences with " << VirtReg << "\n"); + assert(!Intfs.empty() && "expected interference"); + + // Spill each interfering vreg allocated to PhysReg or an alias. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval &Spill = *Intfs[i]; + + // Skip duplicates. + if (!VRM->hasPhys(Spill.reg)) + continue; + + // Deallocate the interfering vreg by removing it from the union. + // A LiveInterval instance may not be in a union during modification! + Matrix->unassign(Spill); + + // Spill the extracted interval. + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM); + spiller().spill(LRE); + } + return true; +} + +// Driver for the register assignment and splitting heuristics. +// Manages iteration over the LiveIntervalUnions. +// +// This is a minimal implementation of register assignment and splitting that +// spills whenever we run out of registers. +// +// selectOrSplit can only be called once per live virtual register. We then do a +// single interference test for each register the correct class until we find an +// available register. So, the number of interference tests in the worst case is +// |vregs| * |machineregs|. And since the number of interference tests is +// minimal, there is no value in caching them outside the scope of +// selectOrSplit(). +unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs) { + // Populate a list of physical register spill candidates. + SmallVector<unsigned, 8> PhysRegSpillCands; + + // Check for an available register in this class. + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + while (unsigned PhysReg = Order.next()) { + // Check for interference in PhysReg + switch (Matrix->checkInterference(VirtReg, PhysReg)) { + case LiveRegMatrix::IK_Free: + // PhysReg is available, allocate it. + return PhysReg; + + case LiveRegMatrix::IK_VirtReg: + // Only virtual registers in the way, we may be able to spill them. + PhysRegSpillCands.push_back(PhysReg); + continue; + + default: + // RegMask or RegUnit interference. + continue; + } + } + + // Try to spill another interfering reg with less spill weight. + for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(), + PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) + continue; + + assert(!Matrix->checkInterference(VirtReg, *PhysRegI) && + "Interference after spill."); + // Tell the caller to allocate to this newly freed physical register. + return *PhysRegI; + } + + // No other spill candidates were found, so spill the current VirtReg. + DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); + if (!VirtReg.isSpillable()) + return ~0u; + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM); + spiller().spill(LRE); + + // The live virtual register requesting allocation was spilled, so tell + // the caller not to allocate anything during this round. + return 0; +} + +bool RABasic::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" + << "********** Function: " + << mf.getName() << '\n'); + + MF = &mf; + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); + + allocatePhysRegs(); + + // Diagnostic output before rewriting + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); + + releaseMemory(); + return true; +} + +FunctionPass* llvm::createBasicRegisterAllocator() +{ + return new RABasic(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp new file mode 100644 index 0000000..bb9c05c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -0,0 +1,1117 @@ +//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumCopies, "Number of copies coalesced"); + +static RegisterRegAlloc + fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); + +namespace { + class RAFast : public MachineFunctionPass { + public: + static char ID; + RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), + isBulkSpilling(false) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; + + // Basic block currently being allocated. + MachineBasicBlock *MBB; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; + + // Everything we know about a live virtual register. + struct LiveReg { + MachineInstr *LastUse; // Last instr to use reg. + unsigned VirtReg; // Virtual register number. + unsigned PhysReg; // Currently held here. + unsigned short LastOpNum; // OpNum on LastUse. + bool Dirty; // Register needs spill. + + explicit LiveReg(unsigned v) + : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {} + + unsigned getSparseSetIndex() const { + return TargetRegisterInfo::virtReg2Index(VirtReg); + } + }; + + typedef SparseSet<LiveReg> LiveRegMap; + + // LiveVirtRegs - This map contains entries for each virtual register + // that is currently available in a physical register. + LiveRegMap LiveVirtRegs; + + DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap; + + // RegState - Track the state of a physical register. + enum RegState { + // A disabled register is not available for allocation, but an alias may + // be in use. A register can only be moved out of the disabled state if + // all aliases are disabled. + regDisabled, + + // A free register is not currently in use and can be allocated + // immediately without checking aliases. + regFree, + + // A reserved register has been assigned explicitly (e.g., setting up a + // call parameter), and it remains reserved until it is used. + regReserved + + // A register state may also be a virtual register number, indication that + // the physical register is currently allocated to a virtual register. In + // that case, LiveVirtRegs contains the inverse mapping. + }; + + // PhysRegState - One of the RegState enums, or a virtreg. + std::vector<unsigned> PhysRegState; + + // Set of register units. + typedef SparseSet<unsigned> UsedInInstrSet; + + // Set of register units that are used in the current instruction, and so + // cannot be allocated. + UsedInInstrSet UsedInInstr; + + // Mark a physreg as used in this instruction. + void markRegUsedInInstr(unsigned PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.insert(*Units); + } + + // Check if a physreg or any of its aliases are used in this instruction. + bool isRegUsedInInstr(unsigned PhysReg) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (UsedInInstr.count(*Units)) + return true; + return false; + } + + // SkippedInstrs - Descriptors of instructions whose clobber list was + // ignored because all registers were spilled. It is still necessary to + // mark all the clobbered registers as used by the function. + SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs; + + // isBulkSpilling - This flag is set when LiveRegMap will be cleared + // completely after spilling all live registers. LiveRegMap entries should + // not be erased. + bool isBulkSpilling; + + enum { + spillClean = 1, + spillDirty = 100, + spillImpossible = ~0u + }; + public: + virtual const char *getPassName() const { + return "Fast Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + bool runOnMachineFunction(MachineFunction &Fn); + void AllocateBasicBlock(); + void handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead); + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + bool isLastUseOfLocalReg(MachineOperand&); + + void addKillFlag(const LiveReg&); + void killVirtReg(LiveRegMap::iterator); + void killVirtReg(unsigned VirtReg); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); + void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); + + void usePhysReg(MachineOperand&); + void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); + unsigned calcSpillCost(unsigned PhysReg) const; + void assignVirtToPhysReg(LiveReg&, unsigned PhysReg); + LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg); + LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator, + unsigned Hint); + LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + void spillAll(MachineBasicBlock::iterator MI); + bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); + }; + char RAFast::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + if (SS != -1) + return SS; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot. + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + +/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to +/// its virtual register, and it is guaranteed to be a block-local register. +/// +bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { + // If the register has ever been spilled or reloaded, we conservatively assume + // it is a global register used in multiple blocks. + if (StackSlotForVirtReg[MO.getReg()] != -1) + return false; + + // Check that the use/def chain has exactly one operand - MO. + MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); + if (&I.getOperand() != &MO) + return false; + return ++I == MRI->reg_nodbg_end(); +} + +/// addKillFlag - Set kill flags on last use of a virtual register. +void RAFast::addKillFlag(const LiveReg &LR) { + if (!LR.LastUse) return; + MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); + if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { + if (MO.getReg() == LR.PhysReg) + MO.setIsKill(); + else + LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); + } +} + +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(LiveRegMap::iterator LRI) { + addKillFlag(*LRI); + assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && + "Broken RegState mapping"); + PhysRegState[LRI->PhysReg] = regFree; + // Erase from LiveVirtRegs unless we're spilling in bulk. + if (!isBulkSpilling) + LiveVirtRegs.erase(LRI); +} + +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "killVirtReg needs a virtual register"); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + if (LRI != LiveVirtRegs.end()) + killVirtReg(LRI); +} + +/// spillVirtReg - This method spills the value specified by VirtReg into the +/// corresponding stack slot if needed. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Spilling a physical register is illegal!"); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + spillVirtReg(MI, LRI); +} + +/// spillVirtReg - Do the actual work of spilling. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, + LiveRegMap::iterator LRI) { + LiveReg &LR = *LRI; + assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); + + if (LR.Dirty) { + // If this physreg is used by the instruction, we want to kill it on the + // instruction, not on the spill. + bool SpillKill = LR.LastUse != MI; + LR.Dirty = false; + DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI) + << " in " << PrintReg(LR.PhysReg, TRI)); + const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg); + int FI = getStackSpaceFor(LRI->VirtReg, RC); + DEBUG(dbgs() << " to stack slot #" << FI << "\n"); + TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); + ++NumStores; // Update statistics + + // If this register is used by DBG_VALUE then insert new DBG_VALUE to + // identify spilled location as the place to find corresponding variable's + // value. + SmallVector<MachineInstr *, 4> &LRIDbgValues = + LiveDbgValueMap[LRI->VirtReg]; + for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { + MachineInstr *DBG = LRIDbgValues[li]; + const MDNode *MDPtr = + DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); + int64_t Offset = 0; + if (DBG->getOperand(1).isImm()) + Offset = DBG->getOperand(1).getImm(); + DebugLoc DL; + if (MI == MBB->end()) { + // If MI is at basic block end then use last instruction's location. + MachineBasicBlock::iterator EI = MI; + DL = (--EI)->getDebugLoc(); + } + else + DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { + MachineBasicBlock *MBB = DBG->getParent(); + MBB->insert(MI, NewDV); + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); + } + } + // Now this register is spilled there is should not be any DBG_VALUE + // pointing to this register because they are all pointing to spilled value + // now. + LRIDbgValues.clear(); + if (SpillKill) + LR.LastUse = 0; // Don't kill register again + } + killVirtReg(LRI); +} + +/// spillAll - Spill all dirty virtregs without killing them. +void RAFast::spillAll(MachineBasicBlock::iterator MI) { + if (LiveVirtRegs.empty()) return; + isBulkSpilling = true; + // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order + // of spilling here is deterministic, if arbitrary. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); + i != e; ++i) + spillVirtReg(MI, i); + LiveVirtRegs.clear(); + isBulkSpilling = false; +} + +/// usePhysReg - Handle the direct use of a physical register. +/// Check that the register is not used by a virtreg. +/// Kill the physreg, marking it free. +/// This may add implicit kills to MO->getParent() and invalidate MO. +void RAFast::usePhysReg(MachineOperand &MO) { + unsigned PhysReg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Bad usePhysReg operand"); + markRegUsedInInstr(PhysReg); + switch (PhysRegState[PhysReg]) { + case regDisabled: + break; + case regReserved: + PhysRegState[PhysReg] = regFree; + // Fall through + case regFree: + MO.setIsKill(); + return; + default: + // The physreg was allocated to a virtual register. That means the value we + // wanted has been clobbered. + llvm_unreachable("Instruction uses an allocated register"); + } + + // Maybe a superregister is reserved? + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; + switch (PhysRegState[Alias]) { + case regDisabled: + break; + case regReserved: + assert(TRI->isSuperRegister(PhysReg, Alias) && + "Instruction is not using a subregister of a reserved register"); + // Leave the superregister in the working set. + PhysRegState[Alias] = regFree; + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + case regFree: + if (TRI->isSuperRegister(PhysReg, Alias)) { + // Leave the superregister in the working set. + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + } + // Some other alias was in the working set - clear it. + PhysRegState[Alias] = regDisabled; + break; + default: + llvm_unreachable("Instruction uses an alias of an allocated register"); + } + } + + // All aliases are disabled, bring register into working set. + PhysRegState[PhysReg] = regFree; + MO.setIsKill(); +} + +/// definePhysReg - Mark PhysReg as reserved or free after spilling any +/// virtregs. This is very similar to defineVirtReg except the physreg is +/// reserved instead of allocated. +void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, + RegState NewState) { + markRegUsedInInstr(PhysReg); + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[PhysReg] = NewState; + return; + } + + // This is a disabled register, disable all aliases. + PhysRegState[PhysReg] = NewState; + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[Alias] = regDisabled; + if (TRI->isSuperRegister(PhysReg, Alias)) + return; + break; + } + } +} + + +// calcSpillCost - Return the cost of spilling clearing out PhysReg and +// aliases so it is free for allocation. +// Returns 0 when PhysReg is free or disabled with all aliases disabled - it +// can be allocated directly. +// Returns spillImpossible when PhysReg or an alias can't be spilled. +unsigned RAFast::calcSpillCost(unsigned PhysReg) const { + if (isRegUsedInInstr(PhysReg)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); + return spillImpossible; + } + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + return 0; + case regReserved: + DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " + << PrintReg(PhysReg, TRI) << " is reserved already.\n"); + return spillImpossible; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + return I->Dirty ? spillDirty : spillClean; + } + } + + // This is a disabled register, add up cost of aliases. + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); + unsigned Cost = 0; + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + case regFree: + ++Cost; + break; + case regReserved: + return spillImpossible; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + Cost += I->Dirty ? spillDirty : spillClean; + break; + } + } + } + return Cost; +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) { + DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to " + << PrintReg(PhysReg, TRI) << "\n"); + PhysRegState[PhysReg] = LR.VirtReg; + assert(!LR.PhysReg && "Already assigned a physreg"); + LR.PhysReg = PhysReg; +} + +RAFast::LiveRegMap::iterator +RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; +} + +/// allocVirtReg - Allocate a physical register for VirtReg. +RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, + LiveRegMap::iterator LRI, + unsigned Hint) { + const unsigned VirtReg = LRI->VirtReg; + + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Can only allocate virtual registers"); + + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + + // Ignore invalid hints. + if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || + !RC->contains(Hint) || !MRI->isAllocatable(Hint))) + Hint = 0; + + // Take hint when possible. + if (Hint) { + // Ignore the hint if we would have to spill a dirty register. + unsigned Cost = calcSpillCost(Hint); + if (Cost < spillDirty) { + if (Cost) + definePhysReg(MI, Hint, regFree); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, Hint); + } + } + + ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC); + + // First try to find a completely free register. + for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ + unsigned PhysReg = *I; + if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; + } + } + + DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " + << RC->getName() << "\n"); + + unsigned BestReg = 0, BestCost = spillImpossible; + for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ + unsigned Cost = calcSpillCost(*I); + DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); + DEBUG(dbgs() << "\tCost: " << Cost << "\n"); + DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); + // Cost is 0 when all aliases are already disabled. + if (Cost == 0) { + assignVirtToPhysReg(*LRI, *I); + return LRI; + } + if (Cost < BestCost) + BestReg = *I, BestCost = Cost; + } + + if (BestReg) { + definePhysReg(MI, BestReg, regFree); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, BestReg); + } + + // Nothing we can do. Report an error and keep going with a bad allocation. + MI->emitError("ran out of registers during register allocation"); + definePhysReg(MI, *AO.begin(), regFree); + return assignVirtToPhysReg(VirtReg, *AO.begin()); +} + +/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. +RAFast::LiveRegMap::iterator +RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + if (New) { + // If there is no hint, peek at the only use of this register. + if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + MRI->hasOneNonDBGUse(VirtReg)) { + const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); + // It's a copy, use the destination register as a hint. + if (UseMI.isCopyLike()) + Hint = UseMI.getOperand(0).getReg(); + } + LRI = allocVirtReg(MI, LRI, Hint); + } else if (LRI->LastUse) { + // Redefining a live register - kill at the last use, unless it is this + // instruction defining VirtReg multiple times. + if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) + addKillFlag(*LRI); + } + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + LRI->Dirty = true; + markRegUsedInInstr(LRI->PhysReg); + return LRI; +} + +/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. +RAFast::LiveRegMap::iterator +RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + MachineOperand &MO = MI->getOperand(OpNum); + if (New) { + LRI = allocVirtReg(MI, LRI, Hint); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into " + << PrintReg(LRI->PhysReg, TRI) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI); + ++NumLoads; + } else if (LRI->Dirty) { + if (isLastUseOfLocalReg(MO)) { + DEBUG(dbgs() << "Killing last use: " << MO << "\n"); + if (MO.isUse()) + MO.setIsKill(); + else + MO.setIsDead(); + } else if (MO.isKill()) { + DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); + MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + MO.setIsDead(false); + } + } else if (MO.isKill()) { + // We must remove kill flags from uses of reloaded registers because the + // register would be killed immediately, and there might be a second use: + // %foo = OR %x<kill>, %x + // This would cause a second reload of %x into a different register. + DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); + MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + MO.setIsDead(false); + } + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + markRegUsedInInstr(LRI->PhysReg); + return LRI; +} + +// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering +// subregs. This may invalidate any operand pointers. +// Return true if the operand kills its register. +bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { + MachineOperand &MO = MI->getOperand(OpNum); + bool Dead = MO.isDead(); + if (!MO.getSubReg()) { + MO.setReg(PhysReg); + return MO.isKill() || Dead; + } + + // Handle subregister index. + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setSubReg(0); + + // A kill flag implies killing the full register. Add corresponding super + // register kill. + if (MO.isKill()) { + MI->addRegisterKilled(PhysReg, TRI, true); + return true; + } + + // A <def,read-undef> of a sub-register requires an implicit def of the full + // register. + if (MO.isDef() && MO.isUndef()) + MI->addRegisterDefined(PhysReg, TRI); + + return Dead; +} + +// Handle special instruction operand like early clobbers and tied ops when +// there are additional physreg defines. +void RAFast::handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead) { + DEBUG(dbgs() << "Scanning for through registers:"); + SmallSet<unsigned, 8> ThroughRegs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || + (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { + if (ThroughRegs.insert(Reg)) + DEBUG(dbgs() << ' ' << PrintReg(Reg)); + } + } + + // If any physreg defines collide with preallocated through registers, + // we must spill and reallocate. + DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + markRegUsedInInstr(Reg); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (ThroughRegs.count(PhysRegState[*AI])) + definePhysReg(MI, *AI, regFree); + } + } + + SmallVector<unsigned, 8> PartialDefs; + DEBUG(dbgs() << "Allocating tied uses.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (MO.isUse()) { + unsigned DefIdx = 0; + if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; + DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " + << DefIdx << ".\n"); + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->PhysReg; + setPhysReg(MI, i, PhysReg); + // Note: we don't update the def operand yet. That would cause the normal + // def-scan to attempt spilling. + } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { + DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + // Reload the register, but don't assign to the operand just yet. + // That would confuse the later phys-def processing pass. + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + PartialDefs.push_back(LRI->PhysReg); + } + } + + DEBUG(dbgs() << "Allocating early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (!MO.isEarlyClobber()) + continue; + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + + // Restore UsedInInstr to a state usable for allocating normal virtual uses. + UsedInInstr.clear(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) + << " as used in instr\n"); + markRegUsedInInstr(Reg); + } + + // Also mark PartialDefs as used to avoid reallocation. + for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) + markRegUsedInInstr(PartialDefs[i]); +} + +void RAFast::AllocateBasicBlock() { + DEBUG(dbgs() << "\nAllocating " << *MBB); + + PhysRegState.assign(TRI->getNumRegs(), regDisabled); + assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); + + MachineBasicBlock::iterator MII = MBB->begin(); + + // Add live-in registers as live. + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + if (MRI->isAllocatable(*I)) + definePhysReg(MII, *I, regReserved); + + SmallVector<unsigned, 8> VirtDead; + SmallVector<MachineInstr*, 32> Coalesced; + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB->end()) { + MachineInstr *MI = MII++; + const MCInstrDesc &MCID = MI->getDesc(); + DEBUG({ + dbgs() << "\n>> " << *MI << "Regs:"; + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { + if (PhysRegState[Reg] == regDisabled) continue; + dbgs() << " " << TRI->getName(Reg); + switch(PhysRegState[Reg]) { + case regFree: + break; + case regReserved: + dbgs() << "*"; + break; + default: { + dbgs() << '=' << PrintReg(PhysRegState[Reg]); + LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + if (I->Dirty) + dbgs() << "*"; + assert(I->PhysReg == Reg && "Bad inverse map"); + break; + } + } + } + dbgs() << '\n'; + // Check that LiveVirtRegs is the inverse. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) { + assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && + "Bad map key"); + assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && + "Bad map value"); + assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); + } + }); + + // Debug values are not allowed to change codegen in any way. + if (MI->isDebugValue()) { + bool ScanDbgValue = true; + while (ScanDbgValue) { + ScanDbgValue = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(MI, i, LRI->PhysReg); + else { + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) { + // We can't allocate a physreg for a DebugValue, sorry! + DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); + MO.setReg(0); + } + else { + // Modify DBG_VALUE now that the value is in a spill slot. + int64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << + "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; + } else { + // We can't allocate a physreg for a DebugValue; sorry! + DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); + MO.setReg(0); + } + } + } + LiveDbgValueMap[Reg].push_back(MI); + } + } + // Next instruction. + continue; + } + + // If this is a copy, we may be able to coalesce. + unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0; + if (MI->isCopy()) { + CopyDst = MI->getOperand(0).getReg(); + CopySrc = MI->getOperand(1).getReg(); + CopyDstSub = MI->getOperand(0).getSubReg(); + CopySrcSub = MI->getOperand(1).getSubReg(); + } + + // Track registers used by instruction. + UsedInInstr.clear(); + + // First scan. + // Mark physreg uses and early clobbers as used. + // Find the end of the virtreg operands + unsigned VirtOpEnd = 0; + bool hasTiedOps = false; + bool hasEarlyClobbers = false; + bool hasPartialRedefs = false; + bool hasPhysDefs = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) { + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + continue; + } + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + VirtOpEnd = i+1; + if (MO.isUse()) { + hasTiedOps = hasTiedOps || + MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; + } else { + if (MO.isEarlyClobber()) + hasEarlyClobbers = true; + if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) + hasPartialRedefs = true; + } + continue; + } + if (!MRI->isAllocatable(Reg)) continue; + if (MO.isUse()) { + usePhysReg(MO); + } else if (MO.isEarlyClobber()) { + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + hasEarlyClobbers = true; + } else + hasPhysDefs = true; + } + + // The instruction may have virtual register operands that must be allocated + // the same register at use-time and def-time: early clobbers and tied + // operands. If there are also physical defs, these registers must avoid + // both physical defs and uses, making them more constrained than normal + // operands. + // Similarly, if there are multiple defs and tied operands, we must make + // sure the same register is allocated to uses and defs. + // We didn't detect inline asm tied operands above, so just make this extra + // pass for all inline asm. + if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { + handleThroughOperands(MI, VirtDead); + // Don't attempt coalescing when we have funny stuff going on. + CopyDst = 0; + // Pretend we have early clobbers so the use operands get marked below. + // This is not necessary for the common case of a single tied use. + hasEarlyClobbers = true; + } + + // Second scan. + // Allocate virtreg uses. + for (unsigned i = 0; i != VirtOpEnd; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (MO.isUse()) { + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); + unsigned PhysReg = LRI->PhysReg; + CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; + if (setPhysReg(MI, i, PhysReg)) + killVirtReg(LRI); + } + } + + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setRegUnitUsed(*I); + + // Track registers defined by instruction - early clobbers and tied uses at + // this point. + UsedInInstr.clear(); + if (hasEarlyClobbers) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // Look for physreg defs and tied uses. + if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; + markRegUsedInInstr(Reg); + } + } + + unsigned DefOpEnd = MI->getNumOperands(); + if (MI->isCall()) { + // Spill all virtregs before a call. This serves two purposes: 1. If an + // exception is thrown, the landing pad is going to expect to find + // registers in their spill slots, and 2. we don't have to wade through + // all the <imp-def> operands on the call instruction. + DefOpEnd = VirtOpEnd; + DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MI); + + // The imp-defs are skipped below, but we still need to mark those + // registers as used by the function. + SkippedInstrs.insert(&MCID); + } + + // Third scan. + // Allocate defs and collect dead defs. + for (unsigned i = 0; i != DefOpEnd; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!MRI->isAllocatable(Reg)) continue; + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + continue; + } + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); + unsigned PhysReg = LRI->PhysReg; + if (setPhysReg(MI, i, PhysReg)) { + VirtDead.push_back(Reg); + CopyDst = 0; // cancel coalescing; + } else + CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; + } + + // Kill dead defs after the scan to ensure that multiple defs of the same + // register are allocated identically. We didn't need to do this for uses + // because we are crerating our own kill flags, and they are always at the + // last use. + for (unsigned i = 0, e = VirtDead.size(); i != e; ++i) + killVirtReg(VirtDead[i]); + VirtDead.clear(); + + for (UsedInInstrSet::iterator + I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) + MRI->setRegUnitUsed(*I); + + if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { + DEBUG(dbgs() << "-- coalescing: " << *MI); + Coalesced.push_back(MI); + } else { + DEBUG(dbgs() << "<< " << *MI); + } + } + + // Spill all physical registers holding virtual registers now. + DEBUG(dbgs() << "Spilling live registers at end of block.\n"); + spillAll(MBB->getFirstTerminator()); + + // Erase all the coalesced copies. We are delaying it until now because + // LiveVirtRegs might refer to the instrs. + for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) + MBB->erase(Coalesced[i]); + NumCopies += Coalesced.size(); + + DEBUG(MBB->dump()); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RAFast::runOnMachineFunction(MachineFunction &Fn) { + DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" + << "********** Function: " << Fn.getName() << '\n'); + MF = &Fn; + MRI = &MF->getRegInfo(); + TM = &Fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + MRI->freezeReservedRegs(Fn); + RegClassInfo.runOnMachineFunction(Fn); + UsedInInstr.clear(); + UsedInInstr.setUniverse(TRI->getNumRegUnits()); + + assert(!MRI->isSSA() && "regalloc requires leaving SSA"); + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); + LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); + MBBi != MBBe; ++MBBi) { + MBB = &*MBBi; + AllocateBasicBlock(); + } + + // Add the clobber lists for all the instructions we skipped earlier. + for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator + I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) + if (const uint16_t *Defs = (*I)->getImplicitDefs()) + while (*Defs) + MRI->setPhysRegUsed(*Defs++); + + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers. + MRI->clearVirtRegs(); + + SkippedInstrs.clear(); + StackSlotForVirtReg.clear(); + LiveDbgValueMap.clear(); + return true; +} + +FunctionPass *llvm::createFastRegisterAllocator() { + return new RAFast(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp new file mode 100644 index 0000000..6d84176 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -0,0 +1,1791 @@ +//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RAGreedy function pass for register allocation in +// optimized builds. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" +#include "AllocationOrder.h" +#include "InterferenceCache.h" +#include "LiveDebugVariables.h" +#include "RegAllocBase.h" +#include "SpillPlacement.h" +#include "Spiller.h" +#include "SplitKit.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include <queue> + +using namespace llvm; + +STATISTIC(NumGlobalSplits, "Number of split global live ranges"); +STATISTIC(NumLocalSplits, "Number of split local live ranges"); +STATISTIC(NumEvicted, "Number of interferences evicted"); + +static cl::opt<SplitEditor::ComplementSpillMode> +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); + +static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", + createGreedyRegisterAllocator); + +namespace { +class RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { + + // context + MachineFunction *MF; + + // analyses + SlotIndexes *Indexes; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + EdgeBundles *Bundles; + SpillPlacement *SpillPlacer; + LiveDebugVariables *DebugVars; + + // state + std::auto_ptr<Spiller> SpillerInstance; + std::priority_queue<std::pair<unsigned, unsigned> > Queue; + unsigned NextCascade; + + // Live ranges pass through a number of stages as we try to allocate them. + // Some of the stages may also create new live ranges: + // + // - Region splitting. + // - Per-block splitting. + // - Local splitting. + // - Spilling. + // + // Ranges produced by one of the stages skip the previous stages when they are + // dequeued. This improves performance because we can skip interference checks + // that are unlikely to give any results. It also guarantees that the live + // range splitting algorithm terminates, something that is otherwise hard to + // ensure. + enum LiveRangeStage { + /// Newly created live range that has never been queued. + RS_New, + + /// Only attempt assignment and eviction. Then requeue as RS_Split. + RS_Assign, + + /// Attempt live range splitting if assignment is impossible. + RS_Split, + + /// Attempt more aggressive live range splitting that is guaranteed to make + /// progress. This is used for split products that may not be making + /// progress. + RS_Split2, + + /// Live range will be spilled. No more splitting will be attempted. + RS_Spill, + + /// There is nothing more we can do to this live range. Abort compilation + /// if it can't be assigned. + RS_Done + }; + + static const char *const StageName[]; + + // RegInfo - Keep additional information about each live range. + struct RegInfo { + LiveRangeStage Stage; + + // Cascade - Eviction loop prevention. See canEvictInterference(). + unsigned Cascade; + + RegInfo() : Stage(RS_New), Cascade(0) {} + }; + + IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; + + LiveRangeStage getStage(const LiveInterval &VirtReg) const { + return ExtraRegInfo[VirtReg.reg].Stage; + } + + void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + ExtraRegInfo[VirtReg.reg].Stage = Stage; + } + + template<typename Iterator> + void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + for (;Begin != End; ++Begin) { + unsigned Reg = (*Begin)->reg; + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = NewStage; + } + } + + /// Cost of evicting interference. + struct EvictionCost { + unsigned BrokenHints; ///< Total number of broken hints. + float MaxWeight; ///< Maximum spill weight evicted. + + EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + + bool operator<(const EvictionCost &O) const { + if (BrokenHints != O.BrokenHints) + return BrokenHints < O.BrokenHints; + return MaxWeight < O.MaxWeight; + } + }; + + // splitting state. + std::auto_ptr<SplitAnalysis> SA; + std::auto_ptr<SplitEditor> SE; + + /// Cached per-block interference maps + InterferenceCache IntfCache; + + /// All basic blocks where the current register has uses. + SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; + + /// Global live range splitting candidate info. + struct GlobalSplitCandidate { + // Register intended for assignment, or 0. + unsigned PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. + InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. + BitVector LiveBundles; + SmallVector<unsigned, 8> ActiveBlocks; + + void reset(InterferenceCache &Cache, unsigned Reg) { + PhysReg = Reg; + IntvIdx = 0; + Intf.setPhysReg(Cache, Reg); + LiveBundles.clear(); + ActiveBlocks.clear(); + } + + // Set B[i] = C for every live bundle where B[i] was NoCand. + unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { + unsigned Count = 0; + for (int i = LiveBundles.find_first(); i >= 0; + i = LiveBundles.find_next(i)) + if (B[i] == NoCand) { + B[i] = C; + Count++; + } + return Count; + } + }; + + /// Candidate info for for each PhysReg in AllocationOrder. + /// This vector never shrinks, but grows to the size of the largest register + /// class. + SmallVector<GlobalSplitCandidate, 32> GlobalCand; + + enum { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector<unsigned, 32> BundleCand; + +public: + RAGreedy(); + + /// Return the pass name. + virtual const char* getPassName() const { + return "Greedy Register Allocator"; + } + + /// RAGreedy analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void releaseMemory(); + virtual Spiller &spiller() { return *SpillerInstance; } + virtual void enqueue(LiveInterval *LI); + virtual LiveInterval *dequeue(); + virtual unsigned selectOrSplit(LiveInterval&, + SmallVectorImpl<LiveInterval*>&); + + /// Perform register allocation. + virtual bool runOnMachineFunction(MachineFunction &mf); + + static char ID; + +private: + bool LRE_CanEraseVirtReg(unsigned); + void LRE_WillShrinkVirtReg(unsigned); + void LRE_DidCloneVirtReg(unsigned, unsigned); + + float calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, float&); + void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); + void growRegion(GlobalSplitCandidate &Cand); + float calcGlobalSplitCost(GlobalSplitCandidate&); + bool calcCompactRegion(GlobalSplitCandidate&); + void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); + void calcGapWeights(unsigned, SmallVectorImpl<float>&); + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + void evictInterference(LiveInterval&, unsigned, + SmallVectorImpl<LiveInterval*>&); + + unsigned tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned tryEvict(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); + unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned trySplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); +}; +} // end anonymous namespace + +char RAGreedy::ID = 0; + +#ifndef NDEBUG +const char *const RAGreedy::StageName[] = { + "RS_New", + "RS_Assign", + "RS_Split", + "RS_Split2", + "RS_Spill", + "RS_Done" +}; +#endif + +// Hysteresis to use when comparing floats. +// This helps stabilize decisions based on float comparisons. +const float Hysteresis = 0.98f; + + +FunctionPass* llvm::createGreedyRegisterAllocator() { + return new RAGreedy(); +} + +RAGreedy::RAGreedy(): MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); + initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); + initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); +} + +void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<CalculateSpillWeights>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); + AU.addRequired<EdgeBundles>(); + AU.addRequired<SpillPlacement>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + + +//===----------------------------------------------------------------------===// +// LiveRangeEdit delegate methods +//===----------------------------------------------------------------------===// + +bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (VRM->hasPhys(VirtReg)) { + Matrix->unassign(LIS->getInterval(VirtReg)); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { + if (!VRM->hasPhys(VirtReg)) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + enqueue(&LI); +} + +void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // Cloning a register we haven't even heard about yet? Just ignore it. + if (!ExtraRegInfo.inBounds(Old)) + return; + + // LRE may clone a virtual register because dead code elimination causes it to + // be split into connected components. The new components are much smaller + // than the original, so they should get a new chance at being assigned. + // same stage as the parent. + ExtraRegInfo[Old].Stage = RS_Assign; + ExtraRegInfo.grow(New); + ExtraRegInfo[New] = ExtraRegInfo[Old]; +} + +void RAGreedy::releaseMemory() { + SpillerInstance.reset(0); + ExtraRegInfo.clear(); + GlobalCand.clear(); +} + +void RAGreedy::enqueue(LiveInterval *LI) { + // Prioritize live ranges by size, assigning larger ranges first. + // The queue holds (size, reg) pairs. + const unsigned Size = LI->getSize(); + const unsigned Reg = LI->reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Can only enqueue virtual registers"); + unsigned Prio; + + ExtraRegInfo.grow(Reg); + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = RS_Assign; + + if (ExtraRegInfo[Reg].Stage == RS_Split) { + // Unsplit ranges that couldn't be allocated immediately are deferred until + // everything else has been allocated. + Prio = Size; + } else { + // Everything is allocated in long->short order. Long ranges that don't fit + // should be spilled (or split) ASAP so they don't create interference. + Prio = (1u << 31) + Size; + + // Boost ranges that have a physical register hint. + if (VRM->hasKnownPreference(Reg)) + Prio |= (1u << 30); + } + + Queue.push(std::make_pair(Prio, ~Reg)); +} + +LiveInterval *RAGreedy::dequeue() { + if (Queue.empty()) + return 0; + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); + Queue.pop(); + return LI; +} + + +//===----------------------------------------------------------------------===// +// Direct Assignment +//===----------------------------------------------------------------------===// + +/// tryAssign - Try to assign VirtReg to an available register. +unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + Order.rewind(); + unsigned PhysReg; + while ((PhysReg = Order.next())) + if (!Matrix->checkInterference(VirtReg, PhysReg)) + break; + if (!PhysReg || Order.isHint()) + return PhysReg; + + // PhysReg is available, but there may be a better choice. + + // If we missed a simple hint, try to cheaply evict interference from the + // preferred register. + if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Order.isHint(Hint)) { + DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); + EvictionCost MaxCost(1); + if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { + evictInterference(VirtReg, Hint, NewVRegs); + return Hint; + } + } + + // Try to evict interference from a cheaper alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); + + // Most registers have 0 additional cost. + if (!Cost) + return PhysReg; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + << '\n'); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + return CheapReg ? CheapReg : PhysReg; +} + + +//===----------------------------------------------------------------------===// +// Interference eviction +//===----------------------------------------------------------------------===// + +/// shouldEvict - determine if A should evict the assigned live range B. The +/// eviction policy defined by this function together with the allocation order +/// defined by enqueue() decides which registers ultimately end up being split +/// and spilled. +/// +/// Cascade numbers are used to prevent infinite loops if this function is a +/// cyclic relation. +/// +/// @param A The live range to be assigned. +/// @param IsHint True when A is about to be assigned to its preferred +/// register. +/// @param B The live range to be evicted. +/// @param BreaksHint True when B is already assigned to its preferred register. +bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, bool BreaksHint) { + bool CanSplit = getStage(B) < RS_Spill; + + // Be fairly aggressive about following hints as long as the evictee can be + // split. + if (CanSplit && IsHint && !BreaksHint) + return true; + + return A.weight > B.weight; +} + +/// canEvictInterference - Return true if all interferences between VirtReg and +/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// +/// @param VirtReg Live range that is about to be assigned. +/// @param PhysReg Desired register for assignment. +/// @param IsHint True when PhysReg is VirtReg's preferred register. +/// @param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// @returns True when interference can be evicted cheaper than MaxCost. +bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, + bool IsHint, EvictionCost &MaxCost) { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never + // involved in an eviction before. If a cascade number was assigned, deny + // evicting anything with the same or a newer cascade number. This prevents + // infinite eviction loops. + // + // This works out so a register without a cascade number is allowed to evict + // anything, and it can be evicted by anything. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = NextCascade; + + EvictionCost Cost; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // If there is 10 or more interferences, chances are one is heavier. + if (Q.collectInterferingVRegs(10) >= 10) + return false; + + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + "Only expecting virtual register interference from query"); + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Done) + return false; + // Once a live range becomes small enough, it is urgent that we find a + // register for it. This is indicated by an infinite spill weight. These + // urgent live ranges get to evict almost anything. + // + // Also allow urgent evictions of unspillable ranges from a strictly + // larger allocation order. + bool Urgent = !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + if (Cascade <= IntfCascade) { + if (!Urgent) + return false; + // We permit breaking cascades for urgent evictions. It should be the + // last resort, though, so make it really expensive. + Cost.BrokenHints += 10; + } + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + // Finally, apply the eviction policy for non-urgent evictions. + if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; + } + } + MaxCost = Cost; + return true; +} + +/// evictInterference - Evict any interferring registers that prevent VirtReg +/// from being assigned to Physreg. This assumes that canEvictInterference +/// returned true. +void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // Make sure that VirtReg has a cascade number, and assign that cascade + // number to every evicted register. These live ranges than then only be + // evicted by a newer cascade, preventing infinite loops. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + + DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) + << " interference: Cascade " << Cascade << '\n'); + + // Collect all interfering virtregs first. + SmallVector<LiveInterval*, 8> Intfs; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); + Intfs.append(IVR.begin(), IVR.end()); + } + + // Evict them second. This will invalidate the queries. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval *Intf = Intfs[i]; + // The same VirtReg may be present in multiple RegUnits. Skip duplicates. + if (!VRM->hasPhys(Intf->reg)) + continue; + Matrix->unassign(*Intf); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); + } +} + +/// tryEvict - Try to evict all interferences for a physreg. +/// @param VirtReg Currently unassigned virtual register. +/// @param Order Physregs to try. +/// @return Physreg to assign VirtReg, or 0. +unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs, + unsigned CostPerUseLimit) { + NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); + + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost(~0u); + unsigned BestPhys = 0; + unsigned OrderLimit = Order.getOrder().size(); + + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < ~0u) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight; + + // Check of any registers in RC are below CostPerUseLimit. + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + unsigned MinCost = RegClassInfo.getMinCost(RC); + if (MinCost >= CostPerUseLimit) { + DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + << ", no cheaper registers to be found.\n"); + return 0; + } + + // It is normal for register classes to have a long tail of registers with + // the same cost. We don't need to look at them if they're too expensive. + if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + OrderLimit = RegClassInfo.getLastCostChange(RC); + DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); + } + } + + Order.rewind(); + while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { + if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + continue; + // The first use of a callee-saved register in a function has cost 1. + // Don't start using a CSR when the CostPerUseLimit is low. + if (CostPerUseLimit == 1) + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(CSR, TRI) << '\n'); + continue; + } + + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) + continue; + + // Best so far. + BestPhys = PhysReg; + + // Stop if the hint can be used. + if (Order.isHint()) + break; + } + + if (!BestPhys) + return 0; + + evictInterference(VirtReg, BestPhys, NewVRegs); + return BestPhys; +} + + +//===----------------------------------------------------------------------===// +// Region Splitting +//===----------------------------------------------------------------------===// + +/// addSplitConstraints - Fill out the SplitConstraints vector based on the +/// interference pattern in Physreg and its aliases. Add the constraints to +/// SpillPlacement and return the static cost of this split in Cost, assuming +/// that all preferences in SplitConstraints are met. +/// Return false if there are no bundles with positive bias. +bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, + float &Cost) { + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + + // Reset interference dependent info. + SplitConstraints.resize(UseBlocks.size()); + float StaticCost = 0; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + + BC.Number = BI.MBB->getNumber(); + Intf.moveToBlock(BC.Number); + BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.ChangesValue = BI.FirstDef; + + if (!Intf.hasInterference()) + continue; + + // Number of spill code instructions to insert. + unsigned Ins = 0; + + // Interference for the live-in value. + if (BI.LiveIn) { + if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) + BC.Entry = SpillPlacement::MustSpill, ++Ins; + else if (Intf.first() < BI.FirstInstr) + BC.Entry = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.first() < BI.LastInstr) + ++Ins; + } + + // Interference for the live-out value. + if (BI.LiveOut) { + if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) + BC.Exit = SpillPlacement::MustSpill, ++Ins; + else if (Intf.last() > BI.LastInstr) + BC.Exit = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.last() > BI.FirstInstr) + ++Ins; + } + + // Accumulate the total frequency of inserted spill code. + if (Ins) + StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + Cost = StaticCost; + + // Add constraints for use-blocks. Note that these are the only constraints + // that may add a positive bias, it is downhill from here. + SpillPlacer->addConstraints(SplitConstraints); + return SpillPlacer->scanActiveBundles(); +} + + +/// addThroughConstraints - Add constraints and links to SpillPlacer from the +/// live-through blocks in Blocks. +void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, + ArrayRef<unsigned> Blocks) { + const unsigned GroupSize = 8; + SpillPlacement::BlockConstraint BCS[GroupSize]; + unsigned TBS[GroupSize]; + unsigned B = 0, T = 0; + + for (unsigned i = 0; i != Blocks.size(); ++i) { + unsigned Number = Blocks[i]; + Intf.moveToBlock(Number); + + if (!Intf.hasInterference()) { + assert(T < GroupSize && "Array overflow"); + TBS[T] = Number; + if (++T == GroupSize) { + SpillPlacer->addLinks(makeArrayRef(TBS, T)); + T = 0; + } + continue; + } + + assert(B < GroupSize && "Array overflow"); + BCS[B].Number = Number; + + // Interference for the live-in value. + if (Intf.first() <= Indexes->getMBBStartIdx(Number)) + BCS[B].Entry = SpillPlacement::MustSpill; + else + BCS[B].Entry = SpillPlacement::PrefSpill; + + // Interference for the live-out value. + if (Intf.last() >= SA->getLastSplitPoint(Number)) + BCS[B].Exit = SpillPlacement::MustSpill; + else + BCS[B].Exit = SpillPlacement::PrefSpill; + + if (++B == GroupSize) { + ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); + SpillPlacer->addConstraints(Array); + B = 0; + } + } + + ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); + SpillPlacer->addConstraints(Array); + SpillPlacer->addLinks(makeArrayRef(TBS, T)); +} + +void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; +#ifndef NDEBUG + unsigned Visited = 0; +#endif + + for (;;) { + ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); + // Find new through blocks in the periphery of PrefRegBundles. + for (int i = 0, e = NewBundles.size(); i != e; ++i) { + unsigned Bundle = NewBundles[i]; + // Look at all blocks connected to Bundle in the full graph. + ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + unsigned Block = *I; + if (!Todo.test(Block)) + continue; + Todo.reset(Block); + // This is a new through block. Add it to SpillPlacer later. + ActiveBlocks.push_back(Block); +#ifndef NDEBUG + ++Visited; +#endif + } + } + // Any new blocks to add? + if (ActiveBlocks.size() == AddedTo) + break; + + // Compute through constraints from the interference, or assume that all + // through blocks prefer spilling when forming compact regions. + ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + if (Cand.PhysReg) + addThroughConstraints(Cand.Intf, NewBlocks); + else + // Provide a strong negative bias on through blocks to prevent unwanted + // liveness on loop backedges. + SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); + AddedTo = ActiveBlocks.size(); + + // Perhaps iterating can enable more bundles? + SpillPlacer->iterate(); + } + DEBUG(dbgs() << ", v=" << Visited); +} + +/// calcCompactRegion - Compute the set of edge bundles that should be live +/// when splitting the current live range into compact regions. Compact +/// regions can be computed without looking at interference. They are the +/// regions formed by removing all the live-through blocks from the live range. +/// +/// Returns false if the current live range is already compact, or if the +/// compact regions would form single block regions anyway. +bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { + // Without any through blocks, the live range is already compact. + if (!SA->getNumThroughBlocks()) + return false; + + // Compact regions don't correspond to any physreg. + Cand.reset(IntfCache, 0); + + DEBUG(dbgs() << "Compact region bundles"); + + // Use the spill placer to determine the live bundles. GrowRegion pretends + // that all the through blocks have interference when PhysReg is unset. + SpillPlacer->prepare(Cand.LiveBundles); + + // The static split cost will be zero since Cand.Intf reports no interference. + float Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + growRegion(Cand); + SpillPlacer->finish(); + + if (!Cand.LiveBundles.any()) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + DEBUG({ + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + return true; +} + +/// calcSpillCost - Compute how expensive it would be to split the live range in +/// SA around all use blocks instead of forming bundle regions. +float RAGreedy::calcSpillCost() { + float Cost = 0; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + // We normally only need one spill instruction - a load or a store. + Cost += SpillPlacer->getBlockFrequency(Number); + + // Unless the value is redefined in the block. + if (BI.LiveIn && BI.LiveOut && BI.FirstDef) + Cost += SpillPlacer->getBlockFrequency(Number); + } + return Cost; +} + +/// calcGlobalSplitCost - Return the global split cost of following the split +/// pattern in LiveBundles. This cost should be added to the local cost of the +/// interference pattern in SplitConstraints. +/// +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { + float GlobalCost = 0; + const BitVector &LiveBundles = Cand.LiveBundles; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + unsigned Ins = 0; + + if (BI.LiveIn) + Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); + if (BI.LiveOut) + Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); + if (Ins) + GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + if (!RegIn && !RegOut) + continue; + if (RegIn && RegOut) { + // We need double spill code if this block has interference. + Cand.Intf.moveToBlock(Number); + if (Cand.Intf.hasInterference()) + GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + continue; + } + // live-in / stack-out or stack-in live-out. + GlobalCost += SpillPlacer->getBlockFrequency(Number); + } + return GlobalCost; +} + +/// splitAroundRegion - Split the current live range around the regions +/// determined by BundleCand and GlobalCand. +/// +/// Before calling this function, GlobalCand and BundleCand must be initialized +/// so each bundle is assigned to a valid candidate, or NoCand for the +/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor +/// objects must be initialized for the current live range, and intervals +/// created for the used candidates. +/// +/// @param LREdit The LiveRangeEdit object handling the current split. +/// @param UsedCands List of used GlobalCand entries. Every BundleCand value +/// must appear in this list. +void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, + ArrayRef<unsigned> UsedCands) { + // These are the intervals created for new global ranges. We may create more + // intervals for local ranges. + const unsigned NumGlobalIntvs = LREdit.size(); + DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); + assert(NumGlobalIntvs && "No global intervals configured"); + + // Isolate even single instructions when dealing with a proper sub-class. + // That guarantees register class inflation for the stack interval because it + // is all copies. + unsigned Reg = SA->getParent().reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + + // First handle all the blocks with uses. + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + if (BI.LiveIn) { + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + } + if (BI.LiveOut) { + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + } + + // Create separate intervals for isolated blocks with multiple uses. + if (!IntvIn && !IntvOut) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + continue; + } + + if (IntvIn && IntvOut) + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + else if (IntvIn) + SE->splitRegInBlock(BI, IntvIn, IntfIn); + else + SE->splitRegOutBlock(BI, IntvOut, IntfOut); + } + + // Handle live-through blocks. The relevant live-through blocks are stored in + // the ActiveBlocks list with each candidate. We need to filter out + // duplicates. + BitVector Todo = SA->getThroughBlocks(); + for (unsigned c = 0; c != UsedCands.size(); ++c) { + ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + unsigned Number = Blocks[i]; + if (!Todo.test(Number)) + continue; + Todo.reset(Number); + + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + if (!IntvIn && !IntvOut) + continue; + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + } + } + + ++NumGlobalSplits; + + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(Reg, LREdit.regs()); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + unsigned OrigBlocks = SA->getNumLiveBlocks(); + + // Sort out the new intervals created by splitting. We get four kinds: + // - Remainder intervals should not be split again. + // - Candidate intervals can be assigned to Cand.PhysReg. + // - Block-local splits are candidates for local splitting. + // - DCE leftovers should go back on the queue. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &Reg = *LREdit.get(i); + + // Ignore old intervals from DCE. + if (getStage(Reg) != RS_New) + continue; + + // Remainder interval. Don't try splitting again, spill if it doesn't + // allocate. + if (IntvMap[i] == 0) { + setStage(Reg, RS_Spill); + continue; + } + + // Global intervals. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] < NumGlobalIntvs) { + if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { + DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); + // Don't allow repeated splitting as a safe guard against looping. + setStage(Reg, RS_Split2); + } + continue; + } + + // Other intervals are treated as new. This includes local intervals created + // for blocks with multiple uses, and anything created by DCE. + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around region"); +} + +unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + unsigned NumCands = 0; + unsigned BestCand = NoCand; + float BestCost; + SmallVector<unsigned, 8> UsedCands; + + // Check if we can split this live range around a compact region. + bool HasCompact = calcCompactRegion(GlobalCand.front()); + if (HasCompact) { + // Yes, keep GlobalCand[0] as the compact region candidate. + NumCands = 1; + BestCost = HUGE_VALF; + } else { + // No benefit from the compact region, our fallback will be per-block + // splitting. Make sure we find a solution that is cheaper than spilling. + BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + } + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned i = 0; i != NumCands; ++i) { + if (i == BestCand || !GlobalCand[i].PhysReg) + continue; + unsigned Count = GlobalCand[i].LiveBundles.count(); + if (Count < WorstCount) + Worst = i, WorstCount = Count; + } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; + } + + if (GlobalCand.size() <= NumCands) + GlobalCand.resize(NumCands+1); + GlobalSplitCandidate &Cand = GlobalCand[NumCands]; + Cand.reset(IntfCache, PhysReg); + + SpillPlacer->prepare(Cand.LiveBundles); + float Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); + continue; + } + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + if (Cost >= BestCost) { + DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); + continue; + } + growRegion(Cand); + + SpillPlacer->finish(); + + // No live bundles, defer to splitSingleBlocks(). + if (!Cand.LiveBundles.any()) { + DEBUG(dbgs() << " no bundles.\n"); + continue; + } + + Cost += calcGlobalSplitCost(Cand); + DEBUG({ + dbgs() << ", total = " << Cost << " with bundles"; + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + if (Cost < BestCost) { + BestCand = NumCands; + BestCost = Hysteresis * Cost; // Prevent rounding effects. + } + ++NumCands; + } + + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) + return 0; + + // Prepare split editor. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); + + // Assign all edge bundles to the preferred candidate, or NoCand. + BundleCand.assign(Bundles->getNumBundles(), NoCand); + + // Assign bundles for the best candidate region. + if (BestCand != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[BestCand]; + if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { + UsedCands.push_back(BestCand); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " + << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + // Assign bundles for the compact region. + if (HasCompact) { + GlobalSplitCandidate &Cand = GlobalCand.front(); + assert(!Cand.PhysReg && "Compact region has no physreg"); + if (unsigned B = Cand.getBundles(BundleCand, 0)) { + UsedCands.push_back(0); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " + << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + splitAroundRegion(LREdit, UsedCands); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Per-Block Splitting +//===----------------------------------------------------------------------===// + +/// tryBlockSplit - Split a global live range around every block with uses. This +/// creates a lot of local live ranges, that will be split by tryLocalSplit if +/// they don't allocate. +unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); + unsigned Reg = VirtReg.reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + } + // No blocks were split. + if (LREdit.empty()) + return 0; + + // We did split for some blocks. + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + + // Tell LiveDebugVariables about the new ranges. + DebugVars->splitRegister(Reg, LREdit.regs()); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Sort out the new intervals created by splitting. The remainder interval + // goes straight to spilling, the new local ranges get to stay RS_New. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &LI = *LREdit.get(i); + if (getStage(LI) == RS_New && IntvMap[i] == 0) + setStage(LI, RS_Spill); + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Per-Instruction Splitting +//===----------------------------------------------------------------------===// + +/// tryInstructionSplit - Split a live range around individual instructions. +/// This is normally not worthwhile since the spiller is doing essentially the +/// same thing. However, when the live range is in a constrained register +/// class, it may help to insert copies such that parts of the live range can +/// be moved to a larger register class. +/// +/// This is similar to spilling to a larger register class. +unsigned +RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // There is no point to this if there are no larger sub-classes. + if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + return 0; + + // Always enable split spill mode, since we're effectively spilling to a + // register. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitEditor::SM_Size); + + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); + if (Uses.size() <= 1) + return 0; + + DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + + // Split around every non-copy instruction. + for (unsigned i = 0; i != Uses.size(); ++i) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + if (MI->isFullCopy()) { + DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + continue; + } + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SE->useIntv(SegStart, SegStop); + } + + if (LREdit.empty()) { + DEBUG(dbgs() << "All uses were copies.\n"); + return 0; + } + + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Assign all new registers to RS_Spill. This was the last chance. + setStage(LREdit.begin(), LREdit.end(), RS_Spill); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Local Splitting +//===----------------------------------------------------------------------===// + + +/// calcGapWeights - Compute the maximum spill weight that needs to be evicted +/// in order to use PhysReg between two entries in SA->UseSlots. +/// +/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. +/// +void RAGreedy::calcGapWeights(unsigned PhysReg, + SmallVectorImpl<float> &GapWeight) { + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); + const unsigned NumGaps = Uses.size()-1; + + // Start and end points for the interference check. + SlotIndex StartIdx = + BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; + SlotIndex StopIdx = + BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; + + GapWeight.assign(NumGaps, 0.0f); + + // Add interference from each overlapping register. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) + .checkInterference()) + continue; + + // We know that VirtReg is a continuous interval from FirstInstr to + // LastInstr, so we don't need InterferenceQuery. + // + // Interference that overlaps an instruction is counted in both gaps + // surrounding the instruction. The exception is interference before + // StartIdx and after StopIdx. + // + LiveIntervalUnion::SegmentIter IntI = + Matrix->getLiveUnions()[*Units] .find(StartIdx); + for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { + // Skip the gaps before IntI. + while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + // Update the gaps covered by IntI. + const float weight = IntI.value()->weight; + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = std::max(GapWeight[Gap], weight); + if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) + break; + } + if (Gap == NumGaps) + break; + } + } + + // Add fixed interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveInterval &LI = LIS->getRegUnit(*Units); + LiveInterval::const_iterator I = LI.find(StartIdx); + LiveInterval::const_iterator E = LI.end(); + + // Same loop as above. Mark any overlapped gaps as HUGE_VALF. + for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { + while (Uses[Gap+1].getBoundaryIndex() < I->start) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = HUGE_VALF; + if (Uses[Gap+1].getBaseIndex() >= I->end) + break; + } + if (Gap == NumGaps) + break; + } + } +} + +/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only +/// basic block. +/// +unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); + + // Note that it is possible to have an interval that is live-in or live-out + // while only covering a single block - A phi-def can use undef values from + // predecessors, and the block could be a single-block loop. + // We don't bother doing anything clever about such a case, we simply assume + // that the interval is continuous from FirstInstr to LastInstr. We should + // make sure that we don't do anything illegal to such an interval, though. + + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); + if (Uses.size() <= 2) + return 0; + const unsigned NumGaps = Uses.size()-1; + + DEBUG({ + dbgs() << "tryLocalSplit: "; + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + dbgs() << ' ' << Uses[i]; + dbgs() << '\n'; + }); + + // If VirtReg is live across any register mask operands, compute a list of + // gaps with register masks. + SmallVector<unsigned, 8> RegMaskGaps; + if (Matrix->checkRegMaskInterference(VirtReg)) { + // Get regmask slots for the whole block. + ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); + DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + // Constrain to VirtReg's live range. + unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), + Uses.front().getRegSlot()) - RMS.begin(); + unsigned re = RMS.size(); + for (unsigned i = 0; i != NumGaps && ri != re; ++i) { + // Look for Uses[i] <= RMS <= Uses[i+1]. + assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); + if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + continue; + // Skip a regmask on the same instruction as the last use. It doesn't + // overlap the live range. + if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + break; + DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + RegMaskGaps.push_back(i); + // Advance ri to the next gap. A regmask on one of the uses counts in + // both gaps. + while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) + ++ri; + } + DEBUG(dbgs() << '\n'); + } + + // Since we allow local split results to be split again, there is a risk of + // creating infinite loops. It is tempting to require that the new live + // ranges have less instructions than the original. That would guarantee + // convergence, but it is too strict. A live range with 3 instructions can be + // split 2+3 (including the COPY), and we want to allow that. + // + // Instead we use these rules: + // + // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the + // noop split, of course). + // 2. Require progress be made for ranges with getStage() == RS_Split2. All + // the new ranges must have fewer instructions than before the split. + // 3. New ranges with the same number of instructions are marked RS_Split2, + // smaller ranges are marked RS_New. + // + // These rules allow a 3 -> 2+3 split once, which we need. They also prevent + // excessive splitting and infinite loops. + // + bool ProgressRequired = getStage(VirtReg) >= RS_Split2; + + // Best split candidate. + unsigned BestBefore = NumGaps; + unsigned BestAfter = 0; + float BestDiff = 0; + + const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); + SmallVector<float, 8> GapWeight; + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + // Keep track of the largest spill weight that would need to be evicted in + // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. + calcGapWeights(PhysReg, GapWeight); + + // Remove any gaps with regmask clobbers. + if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) + for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) + GapWeight[RegMaskGaps[i]] = HUGE_VALF; + + // Try to find the best sequence of gaps to close. + // The new spill weight must be larger than any gap interference. + + // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. + unsigned SplitBefore = 0, SplitAfter = 1; + + // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). + // It is the spill weight that needs to be evicted. + float MaxGap = GapWeight[0]; + + for (;;) { + // Live before/after split? + const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; + const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' + << Uses[SplitBefore] << '-' << Uses[SplitAfter] + << " i=" << MaxGap); + + // Stop before the interval gets so big we wouldn't be making progress. + if (!LiveBefore && !LiveAfter) { + DEBUG(dbgs() << " all\n"); + break; + } + // Should the interval be extended or shrunk? + bool Shrink = true; + + // How many gaps would the new range have? + unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; + + // Legally, without causing looping? + bool Legal = !ProgressRequired || NewGaps < NumGaps; + + if (Legal && MaxGap < HUGE_VALF) { + // Estimate the new spill weight. Each instruction reads or writes the + // register. Conservatively assume there are no read-modify-write + // instructions. + // + // Try to guess the size of the new interval. + const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter)*SlotIndex::InstrDist); + // Would this split be possible to allocate? + // Never allocate all gaps, we wouldn't be making progress. + DEBUG(dbgs() << " w=" << EstWeight); + if (EstWeight * Hysteresis >= MaxGap) { + Shrink = false; + float Diff = EstWeight - MaxGap; + if (Diff > BestDiff) { + DEBUG(dbgs() << " (best)"); + BestDiff = Hysteresis * Diff; + BestBefore = SplitBefore; + BestAfter = SplitAfter; + } + } + } + + // Try to shrink. + if (Shrink) { + if (++SplitBefore < SplitAfter) { + DEBUG(dbgs() << " shrink\n"); + // Recompute the max when necessary. + if (GapWeight[SplitBefore - 1] >= MaxGap) { + MaxGap = GapWeight[SplitBefore]; + for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) + MaxGap = std::max(MaxGap, GapWeight[i]); + } + continue; + } + MaxGap = 0; + } + + // Try to extend the interval. + if (SplitAfter >= NumGaps) { + DEBUG(dbgs() << " end\n"); + break; + } + + DEBUG(dbgs() << " extend\n"); + MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); + } + } + + // Didn't find any candidates? + if (BestBefore == NumGaps) + return 0; + + DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] + << '-' << Uses[BestAfter] << ", " << BestDiff + << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); + + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit); + + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); + SE->useIntv(SegStart, SegStop); + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + + // If the new range has the same number of instructions as before, mark it as + // RS_Split2 so the next split will be forced to make progress. Otherwise, + // leave the new intervals as RS_New so they can compete. + bool LiveBefore = BestBefore != 0 || BI.LiveIn; + bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; + unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; + if (NewGaps >= NumGaps) { + DEBUG(dbgs() << "Tagging non-progress ranges: "); + assert(!ProgressRequired && "Didn't make progress when it was required."); + for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) + if (IntvMap[i] == 1) { + setStage(*LREdit.get(i), RS_Split2); + DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); + } + DEBUG(dbgs() << '\n'); + } + ++NumLocalSplits; + + return 0; +} + +//===----------------------------------------------------------------------===// +// Live Range Splitting +//===----------------------------------------------------------------------===// + +/// trySplit - Try to split VirtReg or one of its interferences, making it +/// assignable. +/// @return Physreg when VirtReg may be assigned and/or new NewVRegs. +unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*>&NewVRegs) { + // Ranges must be Split2 or less. + if (getStage(VirtReg) >= RS_Spill) + return 0; + + // Local intervals are handled separately. + if (LIS->intervalIsInOneMBB(VirtReg)) { + NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); + SA->analyze(&VirtReg); + unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + return tryInstructionSplit(VirtReg, Order, NewVRegs); + } + + NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); + + SA->analyze(&VirtReg); + + // FIXME: SplitAnalysis may repair broken live ranges coming from the + // coalescer. That may cause the range to become allocatable which means that + // tryRegionSplit won't be making progress. This check should be replaced with + // an assertion when the coalescer is fixed. + if (SA->didRepairRange()) { + // VirtReg has changed, so all cached queries are invalid. + Matrix->invalidateVirtRegs(); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) + return PhysReg; + } + + // First try to split around a region spanning multiple blocks. RS_Split2 + // ranges already made dubious progress with region splitting, so they go + // straight to single block splitting. + if (getStage(VirtReg) < RS_Split2) { + unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + } + + // Then isolate blocks. + return tryBlockSplit(VirtReg, Order, NewVRegs); +} + + +//===----------------------------------------------------------------------===// +// Main Entry Point +//===----------------------------------------------------------------------===// + +unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // First try assigning a free register. + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) + return PhysReg; + + LiveRangeStage Stage = getStage(VirtReg); + DEBUG(dbgs() << StageName[Stage] + << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); + + // Try to evict a less worthy live range, but only for ranges from the primary + // queue. The RS_Split ranges already failed to do this, and they should not + // get a second chance until they have been split. + if (Stage != RS_Split) + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + return PhysReg; + + assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); + + // The first time we see a live range, don't try to split or spill. + // Wait until the second time, when all smaller ranges have been allocated. + // This gives a better picture of the interference to split around. + if (Stage < RS_Split) { + setStage(VirtReg, RS_Split); + DEBUG(dbgs() << "wait for second round\n"); + NewVRegs.push_back(&VirtReg); + return 0; + } + + // If we couldn't allocate a register from spilling, there is probably some + // invalid inline assembly. The base class wil report it. + if (Stage >= RS_Done || !VirtReg.isSpillable()) + return ~0u; + + // Try splitting VirtReg or interferences. + unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + + // Finally spill VirtReg itself. + NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); + + if (VerifyEnabled) + MF->verify(this, "After spilling"); + + // The live virtual register requesting allocation was spilled, so tell + // the caller not to allocate anything during this round. + return 0; +} + +bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" + << "********** Function: " << mf.getName() << '\n'); + + MF = &mf; + if (VerifyEnabled) + MF->verify(this, "Before greedy register allocator"); + + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); + Indexes = &getAnalysis<SlotIndexes>(); + DomTree = &getAnalysis<MachineDominatorTree>(); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); + Loops = &getAnalysis<MachineLoopInfo>(); + Bundles = &getAnalysis<EdgeBundles>(); + SpillPlacer = &getAnalysis<SpillPlacement>(); + DebugVars = &getAnalysis<LiveDebugVariables>(); + + SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + ExtraRegInfo.clear(); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + NextCascade = 1; + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); + GlobalCand.resize(32); // This will grow as needed. + + allocatePhysRegs(); + releaseMemory(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp new file mode 100644 index 0000000..607edac --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -0,0 +1,638 @@ +//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based +// register allocator for LLVM. This allocator works by constructing a PBQP +// problem representing the register allocation problem under consideration, +// solving this using a PBQP solver, and mapping the solution back to a +// register assignment. If any variables are selected for spilling then spill +// code is inserted and the process repeated. +// +// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned +// for register allocation. For more information on PBQP for register +// allocation, see the following papers: +// +// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with +// PBQP. In Proceedings of the 7th Joint Modular Languages Conference +// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361. +// +// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular +// architectures. In Proceedings of the Joint Conference on Languages, +// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York, +// NY, USA, 139-148. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" + +#include "llvm/CodeGen/RegAllocPBQP.h" +#include "RegisterCoalescer.h" +#include "Spiller.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PBQP/Graph.h" +#include "llvm/CodeGen/PBQP/HeuristicSolver.h" +#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <limits> +#include <memory> +#include <set> +#include <sstream> +#include <vector> + +using namespace llvm; + +static RegisterRegAlloc +registerPBQPRepAlloc("pbqp", "PBQP register allocator", + createDefaultPBQPRegisterAllocator); + +static cl::opt<bool> +pbqpCoalescing("pbqp-coalescing", + cl::desc("Attempt coalescing during PBQP register allocation."), + cl::init(false), cl::Hidden); + +#ifndef NDEBUG +static cl::opt<bool> +pbqpDumpGraphs("pbqp-dump-graphs", + cl::desc("Dump graphs for each function/round in the compilation unit."), + cl::init(false), cl::Hidden); +#endif + +namespace { + +/// +/// PBQP based allocators solve the register allocation problem by mapping +/// register allocation problems to Partitioned Boolean Quadratic +/// Programming problems. +class RegAllocPBQP : public MachineFunctionPass { +public: + + static char ID; + + /// Construct a PBQP register allocator. + RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0) + : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + } + + /// Return the pass name. + virtual const char* getPassName() const { + return "PBQP Register Allocator"; + } + + /// PBQP analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + /// Perform register allocation + virtual bool runOnMachineFunction(MachineFunction &MF); + +private: + + typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; + typedef std::vector<const LiveInterval*> Node2LIMap; + typedef std::vector<unsigned> AllowedSet; + typedef std::vector<AllowedSet> AllowedSetMap; + typedef std::pair<unsigned, unsigned> RegPair; + typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; + typedef std::set<unsigned> RegSet; + + + std::auto_ptr<PBQPBuilder> builder; + + char *customPassID; + + MachineFunction *mf; + const TargetMachine *tm; + const TargetRegisterInfo *tri; + const TargetInstrInfo *tii; + const MachineLoopInfo *loopInfo; + MachineRegisterInfo *mri; + + std::auto_ptr<Spiller> spiller; + LiveIntervals *lis; + LiveStacks *lss; + VirtRegMap *vrm; + + RegSet vregsToAlloc, emptyIntervalVRegs; + + /// \brief Finds the initial set of vreg intervals to allocate. + void findVRegIntervalsToAlloc(); + + /// \brief Given a solved PBQP problem maps this solution back to a register + /// assignment. + bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, + const PBQP::Solution &solution); + + /// \brief Postprocessing before final spilling. Sets basic block "live in" + /// variables. + void finalizeAlloc() const; + +}; + +char RegAllocPBQP::ID = 0; + +} // End anonymous namespace. + +unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const { + Node2VReg::const_iterator vregItr = node2VReg.find(node); + assert(vregItr != node2VReg.end() && "No vreg for node."); + return vregItr->second; +} + +PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { + VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); + assert(nodeItr != vreg2Node.end() && "No node for vreg."); + return nodeItr->second; + +} + +const PBQPRAProblem::AllowedSet& + PBQPRAProblem::getAllowedSet(unsigned vreg) const { + AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg); + assert(allowedSetItr != allowedSets.end() && "No pregs for vreg."); + const AllowedSet &allowedSet = allowedSetItr->second; + return allowedSet; +} + +unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { + assert(isPRegOption(vreg, option) && "Not a preg option."); + + const AllowedSet& allowedSet = getAllowedSet(vreg); + assert(option <= allowedSet.size() && "Option outside allowed set."); + return allowedSet[option - 1]; +} + +std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, + const LiveIntervals *lis, + const MachineLoopInfo *loopInfo, + const RegSet &vregs) { + + LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); + MachineRegisterInfo *mri = &mf->getRegInfo(); + const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + + std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem()); + PBQP::Graph &g = p->getGraph(); + RegSet pregs; + + // Collect the set of preg intervals, record that they're used in the MF. + for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) { + if (mri->def_empty(Reg)) + continue; + pregs.insert(Reg); + mri->setPhysRegUsed(Reg); + } + + // Iterate over vregs. + for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); + vregItr != vregEnd; ++vregItr) { + unsigned vreg = *vregItr; + const TargetRegisterClass *trc = mri->getRegClass(vreg); + LiveInterval *vregLI = &LIS->getInterval(vreg); + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps; + LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); + + // Compute an initial allowed set for the current vreg. + typedef std::vector<unsigned> VRAllowed; + VRAllowed vrAllowed; + ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf); + for (unsigned i = 0; i != rawOrder.size(); ++i) { + unsigned preg = rawOrder[i]; + if (mri->isReserved(preg)) + continue; + + // vregLI crosses a regmask operand that clobbers preg. + if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg)) + continue; + + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) { + if (vregLI->overlaps(LIS->getRegUnit(*Units))) { + Interference = true; + break; + } + } + if (Interference) + continue; + + // preg is usable for this virtual register. + vrAllowed.push_back(preg); + } + + // Construct the node. + PBQP::Graph::NodeItr node = + g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); + + // Record the mapping and allowed set in the problem. + p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end()); + + PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ? + vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min(); + + addSpillCosts(g.getNodeCosts(node), spillCost); + } + + for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end(); + vr1Itr != vrEnd; ++vr1Itr) { + unsigned vr1 = *vr1Itr; + const LiveInterval &l1 = lis->getInterval(vr1); + const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1); + + for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr); + vr2Itr != vrEnd; ++vr2Itr) { + unsigned vr2 = *vr2Itr; + const LiveInterval &l2 = lis->getInterval(vr2); + const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2); + + assert(!l2.empty() && "Empty interval in vreg set?"); + if (l1.overlaps(l2)) { + PBQP::Graph::EdgeItr edge = + g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), + PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); + + addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri); + } + } + } + + return p; +} + +void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, + PBQP::PBQPNum spillCost) { + costVec[0] = spillCost; +} + +void PBQPBuilder::addInterferenceCosts( + PBQP::Matrix &costMat, + const PBQPRAProblem::AllowedSet &vr1Allowed, + const PBQPRAProblem::AllowedSet &vr2Allowed, + const TargetRegisterInfo *tri) { + assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch."); + assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch."); + + for (unsigned i = 0; i != vr1Allowed.size(); ++i) { + unsigned preg1 = vr1Allowed[i]; + + for (unsigned j = 0; j != vr2Allowed.size(); ++j) { + unsigned preg2 = vr2Allowed[j]; + + if (tri->regsOverlap(preg1, preg2)) { + costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + } + } + } +} + +std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( + MachineFunction *mf, + const LiveIntervals *lis, + const MachineLoopInfo *loopInfo, + const RegSet &vregs) { + + std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs); + PBQP::Graph &g = p->getGraph(); + + const TargetMachine &tm = mf->getTarget(); + CoalescerPair cp(*tm.getRegisterInfo()); + + // Scan the machine function and add a coalescing cost whenever CoalescerPair + // gives the Ok. + for (MachineFunction::const_iterator mbbItr = mf->begin(), + mbbEnd = mf->end(); + mbbItr != mbbEnd; ++mbbItr) { + const MachineBasicBlock *mbb = &*mbbItr; + + for (MachineBasicBlock::const_iterator miItr = mbb->begin(), + miEnd = mbb->end(); + miItr != miEnd; ++miItr) { + const MachineInstr *mi = &*miItr; + + if (!cp.setRegisters(mi)) { + continue; // Not coalescable. + } + + if (cp.getSrcReg() == cp.getDstReg()) { + continue; // Already coalesced. + } + + unsigned dst = cp.getDstReg(), + src = cp.getSrcReg(); + + const float copyFactor = 0.5; // Cost of copy relative to load. Current + // value plucked randomly out of the air. + + PBQP::PBQPNum cBenefit = + copyFactor * LiveIntervals::getSpillWeight(false, true, + loopInfo->getLoopDepth(mbb)); + + if (cp.isPhys()) { + if (!mf->getRegInfo().isAllocatable(dst)) { + continue; + } + + const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src); + unsigned pregOpt = 0; + while (pregOpt < allowed.size() && allowed[pregOpt] != dst) { + ++pregOpt; + } + if (pregOpt < allowed.size()) { + ++pregOpt; // +1 to account for spill option. + PBQP::Graph::NodeItr node = p->getNodeForVReg(src); + addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); + } + } else { + const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); + const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); + PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst); + PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src); + PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2); + if (edge == g.edgesEnd()) { + edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, + allowed2->size() + 1, + 0)); + } else { + if (g.getEdgeNode1(edge) == node2) { + std::swap(node1, node2); + std::swap(allowed1, allowed2); + } + } + + addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2, + cBenefit); + } + } + } + + return p; +} + +void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, + unsigned pregOption, + PBQP::PBQPNum benefit) { + costVec[pregOption] += -benefit; +} + +void PBQPBuilderWithCoalescing::addVirtRegCoalesce( + PBQP::Matrix &costMat, + const PBQPRAProblem::AllowedSet &vr1Allowed, + const PBQPRAProblem::AllowedSet &vr2Allowed, + PBQP::PBQPNum benefit) { + + assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch."); + assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch."); + + for (unsigned i = 0; i != vr1Allowed.size(); ++i) { + unsigned preg1 = vr1Allowed[i]; + for (unsigned j = 0; j != vr2Allowed.size(); ++j) { + unsigned preg2 = vr2Allowed[j]; + + if (preg1 == preg2) { + costMat[i + 1][j + 1] += -benefit; + } + } + } +} + + +void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesCFG(); + au.addRequired<AliasAnalysis>(); + au.addPreserved<AliasAnalysis>(); + au.addRequired<SlotIndexes>(); + au.addPreserved<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.addPreserved<LiveIntervals>(); + //au.addRequiredID(SplitCriticalEdgesID); + if (customPassID) + au.addRequiredID(*customPassID); + au.addRequired<CalculateSpillWeights>(); + au.addRequired<LiveStacks>(); + au.addPreserved<LiveStacks>(); + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + au.addRequired<VirtRegMap>(); + au.addPreserved<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(au); +} + +void RegAllocPBQP::findVRegIntervalsToAlloc() { + + // Iterate over all live ranges. + for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (mri->reg_nodbg_empty(Reg)) + continue; + LiveInterval *li = &lis->getInterval(Reg); + + // If this live interval is non-empty we will use pbqp to allocate it. + // Empty intervals we allocate in a simple post-processing stage in + // finalizeAlloc. + if (!li->empty()) { + vregsToAlloc.insert(li->reg); + } else { + emptyIntervalVRegs.insert(li->reg); + } + } +} + +bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, + const PBQP::Solution &solution) { + // Set to true if we have any spills + bool anotherRoundNeeded = false; + + // Clear the existing allocation. + vrm->clearAllVirt(); + + const PBQP::Graph &g = problem.getGraph(); + // Iterate over the nodes mapping the PBQP solution to a register + // assignment. + for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(), + nodeEnd = g.nodesEnd(); + node != nodeEnd; ++node) { + unsigned vreg = problem.getVRegForNode(node); + unsigned alloc = solution.getSelection(node); + + if (problem.isPRegOption(vreg, alloc)) { + unsigned preg = problem.getPRegForOption(vreg, alloc); + DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> " + << tri->getName(preg) << "\n"); + assert(preg != 0 && "Invalid preg selected."); + vrm->assignVirt2Phys(vreg, preg); + } else if (problem.isSpillOption(vreg, alloc)) { + vregsToAlloc.erase(vreg); + SmallVector<LiveInterval*, 8> newSpills; + LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); + spiller->spill(LRE); + + DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: " + << LRE.getParent().weight << ", New vregs: "); + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); + itr != end; ++itr) { + assert(!(*itr)->empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); + vregsToAlloc.insert((*itr)->reg); + } + + DEBUG(dbgs() << ")\n"); + + // We need another round if spill intervals were added. + anotherRoundNeeded |= !LRE.empty(); + } else { + llvm_unreachable("Unknown allocation option."); + } + } + + return !anotherRoundNeeded; +} + + +void RegAllocPBQP::finalizeAlloc() const { + // First allocate registers for the empty intervals. + for (RegSet::const_iterator + itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); + itr != end; ++itr) { + LiveInterval *li = &lis->getInterval(*itr); + + unsigned physReg = mri->getSimpleHint(li->reg); + + if (physReg == 0) { + const TargetRegisterClass *liRC = mri->getRegClass(li->reg); + physReg = liRC->getRawAllocationOrder(*mf).front(); + } + + vrm->assignVirt2Phys(li->reg, physReg); + } +} + +bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { + + mf = &MF; + tm = &mf->getTarget(); + tri = tm->getRegisterInfo(); + tii = tm->getInstrInfo(); + mri = &mf->getRegInfo(); + + lis = &getAnalysis<LiveIntervals>(); + lss = &getAnalysis<LiveStacks>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + vrm = &getAnalysis<VirtRegMap>(); + spiller.reset(createInlineSpiller(*this, MF, *vrm)); + + mri->freezeReservedRegs(MF); + + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); + + // Allocator main loop: + // + // * Map current regalloc problem to a PBQP problem + // * Solve the PBQP problem + // * Map the solution back to a register allocation + // * Spill if necessary + // + // This process is continued till no more spills are generated. + + // Find the vreg intervals in need of allocation. + findVRegIntervalsToAlloc(); + +#ifndef NDEBUG + const Function* func = mf->getFunction(); + std::string fqn = + func->getParent()->getModuleIdentifier() + "." + + func->getName().str(); +#endif + + // If there are non-empty intervals allocate them using pbqp. + if (!vregsToAlloc.empty()) { + + bool pbqpAllocComplete = false; + unsigned round = 0; + + while (!pbqpAllocComplete) { + DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); + + std::auto_ptr<PBQPRAProblem> problem = + builder->build(mf, lis, loopInfo, vregsToAlloc); + +#ifndef NDEBUG + if (pbqpDumpGraphs) { + std::ostringstream rs; + rs << round; + std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); + std::string tmp; + raw_fd_ostream os(graphFileName.c_str(), tmp); + DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" + << graphFileName << "\"\n"); + problem->getGraph().dump(os); + } +#endif + + PBQP::Solution solution = + PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve( + problem->getGraph()); + + pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution); + + ++round; + } + } + + // Finalise allocation, allocate empty ranges. + finalizeAlloc(); + vregsToAlloc.clear(); + emptyIntervalVRegs.clear(); + + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); + + return true; +} + +FunctionPass* llvm::createPBQPRegisterAllocator( + std::auto_ptr<PBQPBuilder> builder, + char *customPassID) { + return new RegAllocPBQP(builder, customPassID); +} + +FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { + if (pbqpCoalescing) { + return createPBQPRegisterAllocator( + std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing())); + } // else + return createPBQPRegisterAllocator( + std::auto_ptr<PBQPBuilder>(new PBQPBuilder())); +} + +#undef DEBUG_TYPE diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp new file mode 100644 index 0000000..87382d8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -0,0 +1,146 @@ +//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterClassInfo class which provides dynamic +// information about target register classes. Callee saved and reserved +// registers depends on calling conventions and other dynamic information, so +// some things cannot be determined statically. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<unsigned> +StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), + cl::desc("Limit all regclasses to N registers")); + +RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0) +{} + +void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { + bool Update = false; + MF = &mf; + + // Allocate new array the first time we see a new target. + if (MF->getTarget().getRegisterInfo() != TRI) { + TRI = MF->getTarget().getRegisterInfo(); + RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); + Update = true; + } + + // Does this MF have different CSRs? + const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); + if (Update || CSR != CalleeSaved) { + // Build a CSRNum map. Every CSR alias gets an entry pointing to the last + // overlapping CSR. + CSRNum.clear(); + CSRNum.resize(TRI->getNumRegs(), 0); + for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + Update = true; + } + CalleeSaved = CSR; + + // Different reserved registers? + const BitVector &RR = MF->getRegInfo().getReservedRegs(); + if (Reserved.size() != RR.size() || RR != Reserved) { + Update = true; + Reserved = RR; + } + + // Invalidate cached information from previous function. + if (Update) + ++Tag; +} + +/// compute - Compute the preferred allocation order for RC with reserved +/// registers filtered out. Volatile registers come first followed by CSR +/// aliases ordered according to the CSR order specified by the target. +void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { + RCInfo &RCI = RegClass[RC->getID()]; + + // Raw register count, including all reserved regs. + unsigned NumRegs = RC->getNumRegs(); + + if (!RCI.Order) + RCI.Order.reset(new MCPhysReg[NumRegs]); + + unsigned N = 0; + SmallVector<MCPhysReg, 16> CSRAlias; + unsigned MinCost = 0xff; + unsigned LastCost = ~0u; + unsigned LastCostChange = 0; + + // FIXME: Once targets reserve registers instead of removing them from the + // allocation order, we can simply use begin/end here. + ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF); + for (unsigned i = 0; i != RawOrder.size(); ++i) { + unsigned PhysReg = RawOrder[i]; + // Remove reserved registers from the allocation order. + if (Reserved.test(PhysReg)) + continue; + unsigned Cost = TRI->getCostPerUse(PhysReg); + MinCost = std::min(MinCost, Cost); + + if (CSRNum[PhysReg]) + // PhysReg aliases a CSR, save it for later. + CSRAlias.push_back(PhysReg); + else { + if (Cost != LastCost) + LastCostChange = N; + RCI.Order[N++] = PhysReg; + LastCost = Cost; + } + } + RCI.NumRegs = N + CSRAlias.size(); + assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass"); + + // CSR aliases go after the volatile registers, preserve the target's order. + for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) { + unsigned PhysReg = CSRAlias[i]; + unsigned Cost = TRI->getCostPerUse(PhysReg); + if (Cost != LastCost) + LastCostChange = N; + RCI.Order[N++] = PhysReg; + LastCost = Cost; + } + + // Register allocator stress test. Clip register class to N registers. + if (StressRA && RCI.NumRegs > StressRA) + RCI.NumRegs = StressRA; + + // Check if RC is a proper sub-class. + if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC)) + if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) + RCI.ProperSubClass = true; + + RCI.MinCost = uint8_t(MinCost); + RCI.LastCostChange = LastCostChange; + + DEBUG({ + dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; + for (unsigned I = 0; I != RCI.NumRegs; ++I) + dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); + dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n"); + }); + + // RCI is now up-to-date. + RCI.Tag = Tag; +} + diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp new file mode 100644 index 0000000..d85646d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -0,0 +1,2193 @@ +//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic RegisterCoalescer interface which +// is used as the common interface used by all clients and +// implementations of register coalescing. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "RegisterCoalescer.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(NumInflated , "Number of register classes inflated"); +STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); +STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); + +static cl::opt<bool> +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +// Temporary flag to test critical edge unsplitting. +static cl::opt<bool> +EnableJoinSplits("join-splitedges", + cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); + +// Temporary flag to test global copy optimization. +static cl::opt<cl::boolOrDefault> +EnableGlobalCopies("join-globalcopies", + cl::desc("Coalesce copies that span blocks (default=subtarget)"), + cl::init(cl::BOU_UNSET), cl::Hidden); + +static cl::opt<bool> +VerifyCoalescing("verify-coalescing", + cl::desc("Verify machine instrs before and after register coalescing"), + cl::Hidden); + +namespace { + class RegisterCoalescer : public MachineFunctionPass, + private LiveRangeEdit::Delegate { + MachineFunction* MF; + MachineRegisterInfo* MRI; + const TargetMachine* TM; + const TargetRegisterInfo* TRI; + const TargetInstrInfo* TII; + LiveIntervals *LIS; + const MachineLoopInfo* Loops; + AliasAnalysis *AA; + RegisterClassInfo RegClassInfo; + + /// \brief True if the coalescer should aggressively coalesce global copies + /// in favor of keeping local copies. + bool JoinGlobalCopies; + + /// \brief True if the coalescer should aggressively coalesce fall-thru + /// blocks exclusively containing copies. + bool JoinSplitEdges; + + /// WorkList - Copy instructions yet to be coalesced. + SmallVector<MachineInstr*, 8> WorkList; + SmallVector<MachineInstr*, 8> LocalWorkList; + + /// ErasedInstrs - Set of instruction pointers that have been erased, and + /// that may be present in WorkList. + SmallPtrSet<MachineInstr*, 8> ErasedInstrs; + + /// Dead instructions that are about to be deleted. + SmallVector<MachineInstr*, 8> DeadDefs; + + /// Virtual registers to be considered for register class inflation. + SmallVector<unsigned, 8> InflateRegs; + + /// Recursively eliminate dead defs in DeadDefs. + void eliminateDeadDefs(); + + /// LiveRangeEdit callback. + void LRE_WillEraseInstruction(MachineInstr *MI); + + /// coalesceLocals - coalesce the LocalWorkList. + void coalesceLocals(); + + /// joinAllIntervals - join compatible live intervals + void joinAllIntervals(); + + /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into WorkList. + void copyCoalesceInMBB(MachineBasicBlock *MBB); + + /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return + /// true if any progress was made. + bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); + + /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// which are the src/dst of the copy instruction CopyMI. This returns + /// true if the copy was successfully coalesced away. If it is not + /// currently possible to coalesce this interval, but it may be possible if + /// other things get coalesced, then it returns true by reference in + /// 'Again'. + bool joinCopy(MachineInstr *TheCopy, bool &Again); + + /// joinIntervals - Attempt to join these two intervals. On failure, this + /// returns false. The output "SrcInt" will not have been modified, so we + /// can use this information below to update aliases. + bool joinIntervals(CoalescerPair &CP); + + /// Attempt joining two virtual registers. Return true on success. + bool joinVirtRegs(CoalescerPair &CP); + + /// Attempt joining with a reserved physreg. + bool joinReservedPhysReg(CoalescerPair &CP); + + /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// the source value number is defined by a copy from the destination reg + /// see if we can merge these two destination reg valno# into a single + /// value number, eliminating a copy. + bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); + + /// hasOtherReachingDefs - Return true if there are definitions of IntB + /// other than BValNo val# that can reach uses of AValno val# of IntA. + bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + VNInfo *AValNo, VNInfo *BValNo); + + /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// If the source value number is defined by a commutable instruction and + /// its other operand is coalesced to the copy dest register, see if we + /// can transform the copy into a noop by commuting the definition. + bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + + /// reMaterializeTrivialDef - If the source of a copy is defined by a + /// trivial computation, replace the copy by rematerialize the definition. + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); + + /// canJoinPhys - Return true if a physreg copy should be joined. + bool canJoinPhys(const CoalescerPair &CP); + + /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + /// update the subregister number if it is not zero. If DstReg is a + /// physical register and the existing subregister number of the def / use + /// being updated is not zero, make sure to set it to the correct physical + /// subregister. + void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + + /// eliminateUndefCopy - Handle copies of undef values. + bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); + + public: + static char ID; // Class identification, replacement for typeinfo + RegisterCoalescer() : MachineFunctionPass(ID) { + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + /// print - Implement the dump method. + virtual void print(raw_ostream &O, const Module* = 0) const; + }; +} /// end anonymous namespace + +char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; + +INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) + +char RegisterCoalescer::ID = 0; + +static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) { + if (MI->isCopy()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = MI->getOperand(1).getSubReg(); + } else if (MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else + return false; + return true; +} + +// Return true if this block should be vacated by the coalescer to eliminate +// branches. The important cases to handle in the coalescer are critical edges +// split during phi elimination which contain only copies. Simple blocks that +// contain non-branches should also be vacated, but this can be handled by an +// earlier pass similar to early if-conversion. +static bool isSplitEdge(const MachineBasicBlock *MBB) { + if (MBB->pred_size() != 1 || MBB->succ_size() != 1) + return false; + + for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) { + if (!MII->isCopyLike() && !MII->isUnconditionalBranch()) + return false; + } + return true; +} + +bool CoalescerPair::setRegisters(const MachineInstr *MI) { + SrcReg = DstReg = 0; + SrcIdx = DstIdx = 0; + NewRC = 0; + Flipped = CrossClass = false; + + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) + return false; + Partial = SrcSub || DstSub; + + // If one register is a physreg, it must be Dst. + if (TargetRegisterInfo::isPhysicalRegister(Src)) { + if (TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + Flipped = true; + } + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + // Eliminate DstSub on a physreg. + if (DstSub) { + Dst = TRI.getSubReg(Dst, DstSub); + if (!Dst) return false; + DstSub = 0; + } + + // Eliminate SrcSub by picking a corresponding Dst superregister. + if (SrcSub) { + Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + if (!Dst) return false; + SrcSub = 0; + } else if (!MRI.getRegClass(Src)->contains(Dst)) { + return false; + } + } else { + // Both registers are virtual. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + + // Both registers have subreg indices. + if (SrcSub && DstSub) { + // Copies between different sub-registers are never coalescable. + if (Src == Dst && SrcSub != DstSub) + return false; + + NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, + SrcIdx, DstIdx); + if (!NewRC) + return false; + } else if (DstSub) { + // SrcReg will be merged with a sub-register of DstReg. + SrcIdx = DstSub; + NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + } else if (SrcSub) { + // DstReg will be merged with a sub-register of SrcReg. + DstIdx = SrcSub; + NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); + } else { + // This is a straight copy without sub-registers. + NewRC = TRI.getCommonSubClass(DstRC, SrcRC); + } + + // The combined constraint may be impossible to satisfy. + if (!NewRC) + return false; + + // Prefer SrcReg to be a sub-register of DstReg. + // FIXME: Coalescer should support subregs symmetrically. + if (DstIdx && !SrcIdx) { + std::swap(Src, Dst); + std::swap(SrcIdx, DstIdx); + Flipped = !Flipped; + } + + CrossClass = NewRC != DstRC || NewRC != SrcRC; + } + // Check our invariants + assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + "Cannot have a physical SubIdx"); + SrcReg = Src; + DstReg = Dst; + return true; +} + +bool CoalescerPair::flip() { + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + return false; + std::swap(SrcReg, DstReg); + std::swap(SrcIdx, DstIdx); + Flipped = !Flipped; + return true; +} + +bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { + if (!MI) + return false; + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) + return false; + + // Find the virtual register that is SrcReg. + if (Dst == SrcReg) { + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + } else if (Src != SrcReg) { + return false; + } + + // Now check that Dst matches DstReg. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); + // DstSub could be set for a physreg from INSERT_SUBREG. + if (DstSub) + Dst = TRI.getSubReg(Dst, DstSub); + // Full copy of Src. + if (!SrcSub) + return DstReg == Dst; + // This is a partial register copy. Check that the parts match. + return TRI.getSubReg(DstReg, SrcSub) == Dst; + } else { + // DstReg is virtual. + if (DstReg != Dst) + return false; + // Registers match, do the subregisters line up? + return TRI.composeSubRegIndices(SrcIdx, SrcSub) == + TRI.composeSubRegIndices(DstIdx, DstSub); + } +} + +void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RegisterCoalescer::eliminateDeadDefs() { + SmallVector<LiveInterval*, 8> NewRegs; + LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); +} + +// Callback from eliminateDeadDefs(). +void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { + // MI may be in WorkList. Make sure we don't visit it. + ErasedInstrs.insert(MI); +} + +/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert(!CP.isPartial() && "This doesn't work for partial copies."); + assert(!CP.isPhys() && "This doesn't work for physreg copies."); + + LiveInterval &IntA = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + if (BLR == IntB.end()) return false; + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (BValNo->def != CopyIdx) return false; + + // AValNo is the value number in A that defines the copy, A3 in the example. + SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; + VNInfo *AValNo = ALR->valno; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); + // Don't allow any partial copies, even if isCoalescable() allows them. + if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) + return false; + + // Get the LiveRange in IntB that this value number starts with. + LiveInterval::iterator ValLR = + IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); + if (ValLR == IntB.end()) + return false; + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = + LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); + + SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->valno) + IntB.MergeValueNumberInto(BValNo, ValLR->valno); + DEBUG(dbgs() << " result = " << IntB << '\n'); + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValLREndInst->getOperand(UIdx).setIsKill(false); + } + + // Rewrite the copy. If the copy instruction was killing the destination + // register before the merge, find the last use and trim the live range. That + // will also add the isKill marker. + CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); + if (ALR->end == CopyIdx) + LIS->shrinkToUses(&IntA); + + ++numExtends; + return true; +} + +/// hasOtherReachingDefs - Return true if there are definitions of IntB +/// other than BValNo val# that can reach uses of AValno val# of IntA. +bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + // If AValNo has PHI kills, conservatively assume that IntB defs can reach + // the PHI values. + if (LIS->hasPHIKill(IntA, AValNo)) + return true; + + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + LiveInterval::Ranges::iterator BI = + std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); + if (BI != IntB.ranges.begin()) + --BI; + for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + if (BI->start <= AI->start && BI->end > AI->start) + return true; + if (BI->start > AI->start && BI->start < AI->end) + return true; + } + } + return false; +} + +/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// IntA being the source and IntB being the dest, thus this defines a value +/// number in IntB. If the source value number (in IntA) is defined by a +/// commutable instruction and its other operand is coalesced to the copy dest +/// register, see if we can transform the copy into a noop by commuting the +/// definition. For example, +/// +/// A3 = op A2 B0<kill> +/// ... +/// B1 = A3 <- this copy +/// ... +/// = op A3 <- more uses +/// +/// ==> +/// +/// B2 = op B0 A2<kill> +/// ... +/// B1 = B2 <- now an identify copy +/// ... +/// = op B2 <- more uses +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert (!CP.isPhys()); + + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + + LiveInterval &IntA = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); + if (!BValNo || BValNo->def != CopyIdx) + return false; + + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); + assert(AValNo && "COPY source not live"); + if (AValNo->isPHIDef() || AValNo->isUnused()) + return false; + MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; + if (!DefMI->isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + unsigned Op1, Op2, NewDstIdx; + if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) + return false; + if (Op1 == UseOpIdx) + NewDstIdx = Op2; + else if (Op2 == UseOpIdx) + NewDstIdx = Op1; + else + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineRegisterInfo::use_nodbg_iterator UI = + MRI->use_nodbg_begin(IntA.reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + // If this use is tied to a def, we can't rewrite the register. + if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) + return false; + } + + DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' + << *DefMI); + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = TII->commuteInstruction(DefMI); + if (!NewMI) + return false; + if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && + TargetRegisterInfo::isVirtualRegister(IntB.reg) && + !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) + return false; + if (NewMI != DefMI) { + LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); + MachineBasicBlock::iterator Pos = DefMI; + MBB->insert(Pos, NewMI); + MBB->erase(DefMI); + } + unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); + NewMI->getOperand(OpIdx).setIsKill(); + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A<kill> + // ... + // = B + + // Update uses of IntA of the specific Val# with IntB. + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), + UE = MRI->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->isDebugValue()) { + // FIXME These don't have an instruction index. Not clear we have enough + // info to decide whether to do this replacement or not. For now do it. + UseMO.setReg(NewReg); + continue; + } + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + // Kill flags are no longer accurate. They are recomputed after RA. + UseMO.setIsKill(false); + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *TRI); + else + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (!UseMI->isCopy()) + continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + + // This copy will become a noop. If it's defining a new val#, merge it into + // BValNo. + SlotIndex DefIdx = UseIdx.getRegSlot(); + VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); + if (!DVNI) + continue; + DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); + assert(DVNI->def == DefIdx); + BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + ErasedInstrs.insert(UseMI); + LIS->RemoveMachineInstrFromMaps(UseMI); + UseMI->eraseFromParent(); + } + + // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // is updated. + VNInfo *ValNo = BValNo; + ValNo->def = AValNo->def; + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + } + DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); + + IntA.removeValNo(AValNo); + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); + ++numCommutes; + return true; +} + +/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// computation, replace the copy by rematerialize the definition. +bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, + MachineInstr *CopyMI) { + unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + LiveInterval &SrcInt = LIS->getInterval(SrcReg); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); + LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); + assert(SrcLR != SrcInt.end() && "Live range not found!"); + VNInfo *ValNo = SrcLR->valno; + if (ValNo->isPHIDef() || ValNo->isUnused()) + return false; + MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); + if (!DefMI) + return false; + assert(DefMI && "Defining instruction disappeared"); + if (!DefMI->isAsCheapAsAMove()) + return false; + if (!TII->isTriviallyReMaterializable(DefMI, AA)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(TII, AA, SawStore)) + return false; + const MCInstrDesc &MCID = DefMI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; + if (!DefMI->isImplicitDef()) { + // Make sure the copy destination register class fits the instruction + // definition register class. The mismatch can happen as a result of earlier + // extract_subreg, insert_subreg, subreg_to_reg coalescing. + const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (!MRI->constrainRegClass(DstReg, RC)) + return false; + } else if (!RC->contains(DstReg)) + return false; + } + + MachineBasicBlock *MBB = CopyMI->getParent(); + MachineBasicBlock::iterator MII = + llvm::next(MachineBasicBlock::iterator(CopyMI)); + TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); + MachineInstr *NewMI = prior(MII); + + // The original DefMI may have been a subregister def, but the full register + // class of its destination matches the destination of CopyMI, and CopyMI is + // either a full register def or is read-undef. Therefore we can clear the + // subregister index on the rematerialized instruction. + NewMI->getOperand(0).setSubReg(0); + + // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). + // We need to remember these so we can add intervals once we insert + // NewMI into SlotIndexes. + SmallVector<unsigned, 4> NewMIImplDefs; + for (unsigned i = NewMI->getDesc().getNumOperands(), + e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (MO.isReg()) { + assert(MO.isDef() && MO.isImplicit() && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())); + NewMIImplDefs.push_back(MO.getReg()); + } + } + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg()) { + assert(MO.isImplicit() && "No explicit operands after implict operands."); + // Discard VReg implicit defs. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + NewMI->addOperand(MO); + } + } + } + + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + + SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); + for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { + unsigned Reg = NewMIImplDefs[i]; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) + LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); + } + + CopyMI->eraseFromParent(); + ErasedInstrs.insert(CopyMI); + DEBUG(dbgs() << "Remat: " << *NewMI); + ++NumReMats; + + // The source interval can become smaller because we removed a use. + LIS->shrinkToUses(&SrcInt, &DeadDefs); + if (!DeadDefs.empty()) + eliminateDeadDefs(); + + return true; +} + +/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> +/// values, it only removes local variables. When we have a copy like: +/// +/// %vreg1 = COPY %vreg2<undef> +/// +/// We delete the copy and remove the corresponding value number from %vreg1. +/// Any uses of that value number are marked as <undef>. +bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, + const CoalescerPair &CP) { + SlotIndex Idx = LIS->getInstructionIndex(CopyMI); + LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); + if (SrcInt->liveAt(Idx)) + return false; + LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); + if (DstInt->liveAt(Idx)) + return false; + + // No intervals are live-in to CopyMI - it is undef. + if (CP.isFlipped()) + DstInt = SrcInt; + SrcInt = 0; + + VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); + assert(DeadVNI && "No value defined in DstInt"); + DstInt->removeValNo(DeadVNI); + + // Find new undef uses. + for (MachineRegisterInfo::reg_nodbg_iterator + I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); + I != E; ++I) { + MachineOperand &MO = I.getOperand(); + if (MO.isDef() || MO.isUndef()) + continue; + MachineInstr *MI = MO.getParent(); + SlotIndex Idx = LIS->getInstructionIndex(MI); + if (DstInt->liveAt(Idx)) + continue; + MO.setIsUndef(true); + DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); + } + return true; +} + +/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// update the subregister number if it is not zero. If DstReg is a +/// physical register and the existing subregister number of the def / use +/// being updated is not zero, make sure to set it to the correct physical +/// subregister. +void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, + unsigned DstReg, + unsigned SubIdx) { + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); + + SmallPtrSet<MachineInstr*, 8> Visited; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // Each instruction can only be rewritten once because sub-register + // composition is not always idempotent. When SrcReg != DstReg, rewriting + // the UseMI operands removes them from the SrcReg use-def chain, but when + // SrcReg is DstReg we could encounter UseMI twice if it has multiple + // operands mentioning the virtual register. + if (SrcReg == DstReg && !Visited.insert(UseMI)) + continue; + + SmallVector<unsigned,8> Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + + // If SrcReg wasn't read, it may still be the case that DstReg is live-in + // because SrcReg is a sub-register. + if (DstInt && !Reads && SubIdx) + Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + + // Adjust <undef> flags in case of sub-register joins. We don't want to + // turn a full def into a read-modify-write sub-register def and vice + // versa. + if (SubIdx && MO.isDef()) + MO.setIsUndef(!Reads); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *TRI); + else + MO.substVirtReg(DstReg, SubIdx, *TRI); + } + + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); + } +} + +/// canJoinPhys - Return true if a copy involving a physreg should be joined. +bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!MRI->isReserved(CP.getDstReg())) { + DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); + return false; + } + + LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); + if (CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + + DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); + return false; +} + +/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { + + Again = false; + DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + + CoalescerPair CP(*TRI); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; + } + + // Dead code elimination. This really should be handled by MachineDCE, but + // sometimes dead copies slip through, and we can't generate invalid live + // ranges. + if (!CP.isPhys() && CopyMI->allDefsAreDead()) { + DEBUG(dbgs() << "\tCopy is dead.\n"); + DeadDefs.push_back(CopyMI); + eliminateDeadDefs(); + return true; + } + + // Eliminate undefs. + if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { + DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + return false; // Not coalescable. + } + + // Coalesced copies are normally removed immediately, but transformations + // like removeCopyByCommutingDef() can inadvertently create identity copies. + // When that happens, just join the values and remove the copy. + if (CP.getSrcReg() == CP.getDstReg()) { + LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); + LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); + if (VNInfo *DefVNI = LRQ.valueDefined()) { + VNInfo *ReadVNI = LRQ.valueIn(); + assert(ReadVNI && "No value before copy and no <undef> flag."); + assert(ReadVNI != DefVNI && "Cannot read and define the same value."); + LI.MergeValueNumberInto(DefVNI, ReadVNI); + DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); + } + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + return true; + } + + // Enforce policies. + if (CP.isPhys()) { + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) + << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) + << '\n'); + if (!canJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (reMaterializeTrivialDef(CP, CopyMI)) + return true; + return false; + } + } else { + DEBUG({ + dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() + << " with "; + if (CP.getDstIdx() && CP.getSrcIdx()) + dbgs() << PrintReg(CP.getDstReg()) << " in " + << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " + << PrintReg(CP.getSrcReg()) << " in " + << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; + else + dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " + << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; + }); + + // When possible, let DstReg be the larger interval. + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > + LIS->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + if (!joinIntervals(CP)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + if (reMaterializeTrivialDef(CP, CopyMI)) + return true; + + // If we can eliminate the copy without merging the live ranges, do so now. + if (!CP.isPartial() && !CP.isPhys()) { + if (adjustCopiesBackFrom(CP, CopyMI) || + removeCopyByCommutingDef(CP, CopyMI)) { + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } + } + + // Otherwise, we are unable to join the intervals. + DEBUG(dbgs() << "\tInterference!\n"); + Again = true; // May be possible to coalesce later. + return false; + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CP.isCrossClass()) { + ++numCrossRCs; + MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); + } + + // Removing sub-register copies can ease the register class constraints. + // Make sure we attempt to inflate the register class of DstReg. + if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) + InflateRegs.push_back(CP.getDstReg()); + + // CopyMI has been erased by joinIntervals at this point. Remove it from + // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back + // to the work list. This keeps ErasedInstrs from growing needlessly. + ErasedInstrs.erase(CopyMI); + + // Rewrite all SrcReg operands to DstReg. + // Also update DstReg operands to include DstIdx if it is set. + if (CP.getDstIdx()) + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + + // SrcReg is guaranteed to be the register whose live interval that is + // being merged. + LIS->removeInterval(CP.getSrcReg()); + + // Update regalloc hint. + TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); + + DEBUG({ + dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); + if (!CP.isPhys()) + dbgs() << LIS->getInterval(CP.getDstReg()); + dbgs() << '\n'; + }); + + ++numJoins; + return true; +} + +/// Attempt joining with a reserved physreg. +bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { + assert(CP.isPhys() && "Must be a physreg copy"); + assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << '\n'); + + assert(CP.isFlipped() && RHS.containsOneValue() && + "Invalid join with reserved register"); + + // Optimization for reserved registers like ESP. We can only merge with a + // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). + // The live range of the reserved register will look like a set of dead defs + // - we don't properly track the live range of reserved registers. + + // Deny any overlapping intervals. This depends on all the reserved + // register live ranges to look like dead defs. + for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) + if (RHS.overlaps(LIS->getRegUnit(*UI))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); + return false; + } + + // Skip any value computations, we are not adding new values to the + // reserved register. Also skip merging the live ranges, the reserved + // register live range doesn't need to be accurate as long as all the + // defs are there. + + // Delete the identity copy. + MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + + // We don't track kills for reserved registers. + MRI->clearKillFlags(CP.getSrcReg()); + + return true; +} + +//===----------------------------------------------------------------------===// +// Interference checking and interval joining +//===----------------------------------------------------------------------===// +// +// In the easiest case, the two live ranges being joined are disjoint, and +// there is no interference to consider. It is quite common, though, to have +// overlapping live ranges, and we need to check if the interference can be +// resolved. +// +// The live range of a single SSA value forms a sub-tree of the dominator tree. +// This means that two SSA values overlap if and only if the def of one value +// is contained in the live range of the other value. As a special case, the +// overlapping values can be defined at the same index. +// +// The interference from an overlapping def can be resolved in these cases: +// +// 1. Coalescable copies. The value is defined by a copy that would become an +// identity copy after joining SrcReg and DstReg. The copy instruction will +// be removed, and the value will be merged with the source value. +// +// There can be several copies back and forth, causing many values to be +// merged into one. We compute a list of ultimate values in the joined live +// range as well as a mappings from the old value numbers. +// +// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI +// predecessors have a live out value. It doesn't cause real interference, +// and can be merged into the value it overlaps. Like a coalescable copy, it +// can be erased after joining. +// +// 3. Copy of external value. The overlapping def may be a copy of a value that +// is already in the other register. This is like a coalescable copy, but +// the live range of the source register must be trimmed after erasing the +// copy instruction: +// +// %src = COPY %ext +// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. +// +// 4. Clobbering undefined lanes. Vector registers are sometimes built by +// defining one lane at a time: +// +// %dst:ssub0<def,read-undef> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// The live range of %src overlaps the %dst value defined by FOO, but +// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane +// which was undef anyway. +// +// The value mapping is more complicated in this case. The final live range +// will have different value numbers for both FOO and BAR, but there is no +// simple mapping from old to new values. It may even be necessary to add +// new PHI values. +// +// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that +// is live, but never read. This can happen because we don't compute +// individual live ranges per lane. +// +// %dst<def> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// This kind of interference is only resolved locally. If the clobbered +// lane value escapes the block, the join is aborted. + +namespace { +/// Track information about values in a single virtual register about to be +/// joined. Objects of this class are always created in pairs - one for each +/// side of the CoalescerPair. +class JoinVals { + LiveInterval &LI; + + // Location of this register in the final joined register. + // Either CP.DstIdx or CP.SrcIdx. + unsigned SubIdx; + + // Values that will be present in the final live range. + SmallVectorImpl<VNInfo*> &NewVNInfo; + + const CoalescerPair &CP; + LiveIntervals *LIS; + SlotIndexes *Indexes; + const TargetRegisterInfo *TRI; + + // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. + // This is suitable for passing to LiveInterval::join(). + SmallVector<int, 8> Assignments; + + // Conflict resolution for overlapping values. + enum ConflictResolution { + // No overlap, simply keep this value. + CR_Keep, + + // Merge this value into OtherVNI and erase the defining instruction. + // Used for IMPLICIT_DEF, coalescable copies, and copies from external + // values. + CR_Erase, + + // Merge this value into OtherVNI but keep the defining instruction. + // This is for the special case where OtherVNI is defined by the same + // instruction. + CR_Merge, + + // Keep this value, and have it replace OtherVNI where possible. This + // complicates value mapping since OtherVNI maps to two different values + // before and after this def. + // Used when clobbering undefined or dead lanes. + CR_Replace, + + // Unresolved conflict. Visit later when all values have been mapped. + CR_Unresolved, + + // Unresolvable conflict. Abort the join. + CR_Impossible + }; + + // Per-value info for LI. The lane bit masks are all relative to the final + // joined register, so they can be compared directly between SrcReg and + // DstReg. + struct Val { + ConflictResolution Resolution; + + // Lanes written by this def, 0 for unanalyzed values. + unsigned WriteLanes; + + // Lanes with defined values in this register. Other lanes are undef and + // safe to clobber. + unsigned ValidLanes; + + // Value in LI being redefined by this def. + VNInfo *RedefVNI; + + // Value in the other live range that overlaps this def, if any. + VNInfo *OtherVNI; + + // Is this value an IMPLICIT_DEF that can be erased? + // + // IMPLICIT_DEF values should only exist at the end of a basic block that + // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be + // safely erased if they are overlapping a live value in the other live + // interval. + // + // Weird control flow graphs and incomplete PHI handling in + // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with + // longer live ranges. Such IMPLICIT_DEF values should be treated like + // normal values. + bool ErasableImplicitDef; + + // True when the live range of this value will be pruned because of an + // overlapping CR_Replace value in the other live range. + bool Pruned; + + // True once Pruned above has been computed. + bool PrunedComputed; + + Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), + RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false), + Pruned(false), PrunedComputed(false) {} + + bool isAnalyzed() const { return WriteLanes != 0; } + }; + + // One entry per value number in LI. + SmallVector<Val, 8> Vals; + + unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); + VNInfo *stripCopies(VNInfo *VNI); + ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); + void computeAssignment(unsigned ValNo, JoinVals &Other); + bool taintExtent(unsigned, unsigned, JoinVals&, + SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); + bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); + bool isPrunedValue(unsigned ValNo, JoinVals &Other); + +public: + JoinVals(LiveInterval &li, unsigned subIdx, + SmallVectorImpl<VNInfo*> &newVNInfo, + const CoalescerPair &cp, + LiveIntervals *lis, + const TargetRegisterInfo *tri) + : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), + Indexes(LIS->getSlotIndexes()), TRI(tri), + Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) + {} + + /// Analyze defs in LI and compute a value mapping in NewVNInfo. + /// Returns false if any conflicts were impossible to resolve. + bool mapValues(JoinVals &Other); + + /// Try to resolve conflicts that require all values to be mapped. + /// Returns false if any conflicts were impossible to resolve. + bool resolveConflicts(JoinVals &Other); + + /// Prune the live range of values in Other.LI where they would conflict with + /// CR_Replace values in LI. Collect end points for restoring the live range + /// after joining. + void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints); + + /// Erase any machine instructions that have been coalesced away. + /// Add erased instructions to ErasedInstrs. + /// Add foreign virtual registers to ShrinkRegs if their live range ended at + /// the erased instrs. + void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs); + + /// Get the value assignments suitable for passing to LiveInterval::join. + const int *getAssignments() const { return Assignments.data(); } +}; +} // end anonymous namespace + +/// Compute the bitmask of lanes actually written by DefMI. +/// Set Redef if there are any partial register definitions that depend on the +/// previous value of the register. +unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { + unsigned L = 0; + for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) + continue; + L |= TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); + if (MO->readsReg()) + Redef = true; + } + return L; +} + +/// Find the ultimate value that VNI was copied from. +VNInfo *JoinVals::stripCopies(VNInfo *VNI) { + while (!VNI->isPHIDef()) { + MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); + assert(MI && "No defining instruction"); + if (!MI->isFullCopy()) + break; + unsigned Reg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + break; + LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + if (!LRQ.valueIn()) + break; + VNI = LRQ.valueIn(); + } + return VNI; +} + +/// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. +/// Return a conflict resolution when possible, but leave the hard cases as +/// CR_Unresolved. +/// Recursively calls computeAssignment() on this and Other, guaranteeing that +/// both OtherVNI and RedefVNI have been analyzed and mapped before returning. +/// The recursion always goes upwards in the dominator tree, making loops +/// impossible. +JoinVals::ConflictResolution +JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + assert(!V.isAnalyzed() && "Value has already been analyzed!"); + VNInfo *VNI = LI.getValNumInfo(ValNo); + if (VNI->isUnused()) { + V.WriteLanes = ~0u; + return CR_Keep; + } + + // Get the instruction defining this value, compute the lanes written. + const MachineInstr *DefMI = 0; + if (VNI->isPHIDef()) { + // Conservatively assume that all lanes in a PHI are valid. + V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); + } else { + DefMI = Indexes->getInstructionFromIndex(VNI->def); + bool Redef = false; + V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); + + // If this is a read-modify-write instruction, there may be more valid + // lanes than the ones written by this instruction. + // This only covers partial redef operands. DefMI may have normal use + // operands reading the register. They don't contribute valid lanes. + // + // This adds ssub1 to the set of valid lanes in %src: + // + // %src:ssub1<def> = FOO + // + // This leaves only ssub1 valid, making any other lanes undef: + // + // %src:ssub1<def,read-undef> = FOO %src:ssub2 + // + // The <read-undef> flag on the def operand means that old lane values are + // not important. + if (Redef) { + V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + assert(V.RedefVNI && "Instruction is reading nonexistent value"); + computeAssignment(V.RedefVNI->id, Other); + V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; + } + + // An IMPLICIT_DEF writes undef values. + if (DefMI->isImplicitDef()) { + // We normally expect IMPLICIT_DEF values to be live only until the end + // of their block. If the value is really live longer and gets pruned in + // another block, this flag is cleared again. + V.ErasableImplicitDef = true; + V.ValidLanes &= ~V.WriteLanes; + } + } + + // Find the value in Other that overlaps VNI->def, if any. + LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + + // It is possible that both values are defined by the same instruction, or + // the values are PHIs defined in the same block. When that happens, the two + // values should be merged into one, but not into any preceding value. + // The first value defined or visited gets CR_Keep, the other gets CR_Merge. + if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { + assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); + + // One value stays, the other is merged. Keep the earlier one, or the first + // one we see. + if (OtherVNI->def < VNI->def) + Other.computeAssignment(OtherVNI->id, *this); + else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { + // This is an early-clobber def overlapping a live-in value in the other + // register. Not mergeable. + V.OtherVNI = OtherLRQ.valueIn(); + return CR_Impossible; + } + V.OtherVNI = OtherVNI; + Val &OtherV = Other.Vals[OtherVNI->id]; + // Keep this value, check for conflicts when analyzing OtherVNI. + if (!OtherV.isAnalyzed()) + return CR_Keep; + // Both sides have been analyzed now. + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Merge; + if (V.ValidLanes & OtherV.ValidLanes) + // Overlapping lanes can't be resolved. + return CR_Impossible; + else + return CR_Merge; + } + + // No simultaneous def. Is Other live at the def? + V.OtherVNI = OtherLRQ.valueIn(); + if (!V.OtherVNI) + // No overlap, no conflict. + return CR_Keep; + + assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); + + // We have overlapping values, or possibly a kill of Other. + // Recursively compute assignments up the dominator tree. + Other.computeAssignment(V.OtherVNI->id, *this); + Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. + // This shouldn't normally happen, but ProcessImplicitDefs can leave such + // IMPLICIT_DEF instructions behind, and there is nothing wrong with it + // technically. + // + // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try + // to erase the IMPLICIT_DEF instruction. + if (OtherV.ErasableImplicitDef && DefMI && + DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { + DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def + << " extends into BB#" << DefMI->getParent()->getNumber() + << ", keeping it.\n"); + OtherV.ErasableImplicitDef = false; + } + + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Replace; + + // Check for simple erasable conflicts. + if (DefMI->isImplicitDef()) + return CR_Erase; + + // Include the non-conflict where DefMI is a coalescable copy that kills + // OtherVNI. We still want the copy erased and value numbers merged. + if (CP.isCoalescable(DefMI)) { + // Some of the lanes copied from OtherVNI may be undef, making them undef + // here too. + V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; + return CR_Erase; + } + + // This may not be a real conflict if DefMI simply kills Other and defines + // VNI. + if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) + return CR_Keep; + + // Handle the case where VNI and OtherVNI can be proven to be identical: + // + // %other = COPY %ext + // %this = COPY %ext <-- Erase this copy + // + if (DefMI->isFullCopy() && !CP.isPartial() && + stripCopies(VNI) == stripCopies(V.OtherVNI)) + return CR_Erase; + + // If the lanes written by this instruction were all undef in OtherVNI, it is + // still safe to join the live ranges. This can't be done with a simple value + // mapping, though - OtherVNI will map to multiple values: + // + // 1 %dst:ssub0 = FOO <-- OtherVNI + // 2 %src = BAR <-- VNI + // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. + // 4 BAZ %dst<kill> + // 5 QUUX %src<kill> + // + // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace + // handles this complex value mapping. + if ((V.WriteLanes & OtherV.ValidLanes) == 0) + return CR_Replace; + + // If the other live range is killed by DefMI and the live ranges are still + // overlapping, it must be because we're looking at an early clobber def: + // + // %dst<def,early-clobber> = ASM %src<kill> + // + // In this case, it is illegal to merge the two live ranges since the early + // clobber def would clobber %src before it was read. + if (OtherLRQ.isKill()) { + // This case where the def doesn't overlap the kill is handled above. + assert(VNI->def.isEarlyClobber() && + "Only early clobber defs can overlap a kill"); + return CR_Impossible; + } + + // VNI is clobbering live lanes in OtherVNI, but there is still the + // possibility that no instructions actually read the clobbered lanes. + // If we're clobbering all the lanes in OtherVNI, at least one must be read. + // Otherwise Other.LI wouldn't be live here. + if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) + return CR_Impossible; + + // We need to verify that no instructions are reading the clobbered lanes. To + // save compile time, we'll only check that locally. Don't allow the tainted + // value to escape the basic block. + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) + return CR_Impossible; + + // There are still some things that could go wrong besides clobbered lanes + // being read, for example OtherVNI may be only partially redefined in MBB, + // and some clobbered lanes could escape the block. Save this analysis for + // resolveConflicts() when all values have been mapped. We need to know + // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute + // that now - the recursive analyzeValue() calls must go upwards in the + // dominator tree. + return CR_Unresolved; +} + +/// Compute the value assignment for ValNo in LI. +/// This may be called recursively by analyzeValue(), but never for a ValNo on +/// the stack. +void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.isAnalyzed()) { + // Recursion should always move up the dominator tree, so ValNo is not + // supposed to reappear before it has been assigned. + assert(Assignments[ValNo] != -1 && "Bad recursion?"); + return; + } + switch ((V.Resolution = analyzeValue(ValNo, Other))) { + case CR_Erase: + case CR_Merge: + // Merge this ValNo into OtherVNI. + assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); + assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); + Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; + DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' + << LI.getValNumInfo(ValNo)->def << " into " + << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' + << V.OtherVNI->def << " --> @" + << NewVNInfo[Assignments[ValNo]]->def << '\n'); + break; + case CR_Replace: + case CR_Unresolved: + // The other value is going to be pruned if this join is successful. + assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); + Other.Vals[V.OtherVNI->id].Pruned = true; + // Fall through. + default: + // This value number needs to go in the final joined live range. + Assignments[ValNo] = NewVNInfo.size(); + NewVNInfo.push_back(LI.getValNumInfo(ValNo)); + break; + } +} + +bool JoinVals::mapValues(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + computeAssignment(i, Other); + if (Vals[i].Resolution == CR_Impossible) { + DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + return false; + } + } + return true; +} + +/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute +/// the extent of the tainted lanes in the block. +/// +/// Multiple values in Other.LI can be affected since partial redefinitions can +/// preserve previously tainted lanes. +/// +/// 1 %dst = VLOAD <-- Define all lanes in %dst +/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 +/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 +/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read +/// +/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) +/// entry to TaintedVals. +/// +/// Returns false if the tainted lanes extend beyond the basic block. +bool JoinVals:: +taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { + VNInfo *VNI = LI.getValNumInfo(ValNo); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); + + // Scan Other.LI from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LI.find(VNI->def); + assert(OtherI != Other.LI.end() && "No conflict?"); + do { + // OtherI is pointing to a tainted value. Abort the join if the tainted + // lanes escape the block. + SlotIndex End = OtherI->end; + if (End >= MBBEnd) { + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << '\n'); + return false; + } + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' + << OtherI->valno->id << '@' << OtherI->start + << " to " << End << '\n'); + // A dead def is not a problem. + if (End.isDead()) + break; + TaintExtent.push_back(std::make_pair(End, TaintedLanes)); + + // Check for another def in the MBB. + if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) + break; + + // Lanes written by the new def are no longer tainted. + const Val &OV = Other.Vals[OtherI->valno->id]; + TaintedLanes &= ~OV.WriteLanes; + if (!OV.RedefVNI) + break; + } while (TaintedLanes); + return true; +} + +/// Return true if MI uses any of the given Lanes from Reg. +/// This does not include partial redefinitions of Reg. +bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, + unsigned Lanes) { + if (MI->isDebugValue()) + return false; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) + continue; + if (!MO->readsReg()) + continue; + if (Lanes & TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) + return true; + } + return false; +} + +bool JoinVals::resolveConflicts(JoinVals &Other) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); + if (V.Resolution != CR_Unresolved) + continue; + DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i + << '@' << LI.getValNumInfo(i)->def << '\n'); + ++NumLaneConflicts; + assert(V.OtherVNI && "Inconsistent conflict resolution."); + VNInfo *VNI = LI.getValNumInfo(i); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the + // join, those lanes will be tainted with a wrong value. Get the extent of + // the tainted lanes. + unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent; + if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) + // Tainted lanes would extend beyond the basic block. + return false; + + assert(!TaintExtent.empty() && "There should be at least one conflict."); + + // Now look at the instructions from VNI->def to TaintExtent (inclusive). + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + MachineBasicBlock::iterator MI = MBB->begin(); + if (!VNI->isPHIDef()) { + MI = Indexes->getInstructionFromIndex(VNI->def); + // No need to check the instruction defining VNI for reads. + ++MI; + } + assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && + "Interference ends on VNI->def. Should have been handled earlier"); + MachineInstr *LastMI = + Indexes->getInstructionFromIndex(TaintExtent.front().first); + assert(LastMI && "Range must end at a proper instruction"); + unsigned TaintNum = 0; + for(;;) { + assert(MI != MBB->end() && "Bad LastMI"); + if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { + DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); + return false; + } + // LastMI is the last instruction to use the current value. + if (&*MI == LastMI) { + if (++TaintNum == TaintExtent.size()) + break; + LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); + assert(LastMI && "Range must end at a proper instruction"); + TaintedLanes = TaintExtent[TaintNum].second; + } + ++MI; + } + + // The tainted lanes are unused. + V.Resolution = CR_Replace; + ++NumLaneResolves; + } + return true; +} + +// Determine if ValNo is a copy of a value number in LI or Other.LI that will +// be pruned: +// +// %dst = COPY %src +// %src = COPY %dst <-- This value to be pruned. +// %dst = COPY %src <-- This value is a copy of a pruned value. +// +bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.Pruned || V.PrunedComputed) + return V.Pruned; + + if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) + return V.Pruned; + + // Follow copies up the dominator tree and check if any intermediate value + // has been pruned. + V.PrunedComputed = true; + V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); + return V.Pruned; +} + +void JoinVals::pruneValues(JoinVals &Other, + SmallVectorImpl<SlotIndex> &EndPoints) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + SlotIndex Def = LI.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + break; + case CR_Replace: { + // This value takes precedence over the value in Other.LI. + LIS->pruneValue(&Other.LI, Def, &EndPoints); + // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF + // instructions are only inserted to provide a live-out value for PHI + // predecessors, so the instruction should simply go away once its value + // has been replaced. + Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; + bool EraseImpDef = OtherV.ErasableImplicitDef && + OtherV.Resolution == CR_Keep; + if (!Def.isBlock()) { + // Remove <def,read-undef> flags. This def is now a partial redef. + // Also remove <def,dead> flags since the joined live range will + // continue past this instruction. + for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); + MO.isValid(); ++MO) + if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { + MO->setIsUndef(EraseImpDef); + MO->setIsDead(false); + } + // This value will reach instructions below, but we need to make sure + // the live range also reaches the instruction at Def. + if (!EraseImpDef) + EndPoints.push_back(Def); + } + DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def + << ": " << Other.LI << '\n'); + break; + } + case CR_Erase: + case CR_Merge: + if (isPrunedValue(i, Other)) { + // This value is ultimately a copy of a pruned value in LI or Other.LI. + // We can no longer trust the value mapping computed by + // computeAssignment(), the value that was originally copied could have + // been replaced. + LIS->pruneValue(&LI, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " + << Def << ": " << LI << '\n'); + } + break; + case CR_Unresolved: + case CR_Impossible: + llvm_unreachable("Unresolved conflicts"); + } + } +} + +void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs) { + for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + // Get the def location before markUnused() below invalidates it. + SlotIndex Def = LI.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any + // longer. The IMPLICIT_DEF instructions are only inserted by + // PHIElimination to guarantee that all PHI predecessors have a value. + if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) + break; + // Remove value number i from LI. Note that this VNInfo is still present + // in NewVNInfo, so it will appear as an unused value number in the final + // joined interval. + LI.getValNumInfo(i)->markUnused(); + LI.removeValNo(LI.getValNumInfo(i)); + DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); + // FALL THROUGH. + + case CR_Erase: { + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); + assert(MI && "No instruction to erase"); + if (MI->isCopy()) { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + ShrinkRegs.push_back(Reg); + } + ErasedInstrs.insert(MI); + DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + default: + break; + } + } +} + +bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { + SmallVector<VNInfo*, 16> NewVNInfo; + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); + JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); + JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); + + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + << '\n'); + + // First compute NewVNInfo and the simple value mappings. + // Detect impossible conflicts early. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) + return false; + + // Some conflicts can only be resolved after all values have been mapped. + if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) + return false; + + // All clear, the live ranges can be merged. + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector<SlotIndex, 8> EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints); + RHSVals.pruneValues(LHSVals, EndPoints); + + // Erase COPY and IMPLICIT_DEF instructions. This may cause some external + // registers to require trimming. + SmallVector<unsigned, 8> ShrinkRegs; + LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + while (!ShrinkRegs.empty()) + LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + + // Join RHS into LHS. + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, + MRI); + + // Kill flags are going to be wrong if the live ranges were overlapping. + // Eventually, we should simply clear all kill flags when computing live + // ranges. They are reinserted after register allocation. + MRI->clearKillFlags(LHS.reg); + MRI->clearKillFlags(RHS.reg); + + if (EndPoints.empty()) + return true; + + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LHS << '\n'); + LIS->extendToIndices(&LHS, EndPoints); + return true; +} + +/// joinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { + return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); +} + +namespace { +// Information concerning MBB coalescing priority. +struct MBBPriorityInfo { + MachineBasicBlock *MBB; + unsigned Depth; + bool IsSplit; + + MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) + : MBB(mbb), Depth(depth), IsSplit(issplit) {} +}; +} + +// C-style comparator that sorts first based on the loop depth of the basic +// block (the unsigned), and then on the MBB number. +// +// EnableGlobalCopies assumes that the primary sort key is loop depth. +static int compareMBBPriority(const void *L, const void *R) { + const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L); + const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R); + // Deeper loops first + if (LHS->Depth != RHS->Depth) + return LHS->Depth > RHS->Depth ? -1 : 1; + + // Try to unsplit critical edges next. + if (LHS->IsSplit != RHS->IsSplit) + return LHS->IsSplit ? -1 : 1; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size(); + unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size(); + if (cl != cr) + return cl > cr ? -1 : 1; + + // As a last resort, sort by block number. + return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1; +} + +/// \returns true if the given copy uses or defines a local live range. +static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { + if (!Copy->isCopy()) + return false; + + unsigned SrcReg = Copy->getOperand(1).getReg(); + unsigned DstReg = Copy->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) + || TargetRegisterInfo::isPhysicalRegister(DstReg)) + return false; + + return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) + || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); +} + +// Try joining WorkList copies starting from index From. +// Null out any successful joins. +bool RegisterCoalescer:: +copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { + bool Progress = false; + for (unsigned i = 0, e = CurrList.size(); i != e; ++i) { + if (!CurrList[i]) + continue; + // Skip instruction pointers that have already been erased, for example by + // dead code elimination. + if (ErasedInstrs.erase(CurrList[i])) { + CurrList[i] = 0; + continue; + } + bool Again = false; + bool Success = joinCopy(CurrList[i], Again); + Progress |= Success; + if (Success || !Again) + CurrList[i] = 0; + } + return Progress; +} + +void +RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + // Collect all copy-like instructions in MBB. Don't start coalescing anything + // yet, it might invalidate the iterator. + const unsigned PrevSize = WorkList.size(); + if (JoinGlobalCopies) { + // Coalesce copies bottom-up to coalesce local defs before local uses. They + // are not inherently easier to resolve, but slightly preferable until we + // have local live range splitting. In particular this is required by + // cmp+jmp macro fusion. + for (MachineBasicBlock::reverse_iterator + MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) { + if (!MII->isCopyLike()) + continue; + if (isLocalCopy(&(*MII), LIS)) + LocalWorkList.push_back(&(*MII)); + else + WorkList.push_back(&(*MII)); + } + } + else { + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) + if (MII->isCopyLike()) + WorkList.push_back(MII); + } + // Try coalescing the collected copies immediately, and remove the nulls. + // This prevents the WorkList from getting too large since most copies are + // joinable on the first attempt. + MutableArrayRef<MachineInstr*> + CurrList(WorkList.begin() + PrevSize, WorkList.end()); + if (copyCoalesceWorkList(CurrList)) + WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), + (MachineInstr*)0), WorkList.end()); +} + +void RegisterCoalescer::coalesceLocals() { + copyCoalesceWorkList(LocalWorkList); + for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) { + if (LocalWorkList[j]) + WorkList.push_back(LocalWorkList[j]); + } + LocalWorkList.clear(); +} + +void RegisterCoalescer::joinAllIntervals() { + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around."); + + std::vector<MBBPriorityInfo> MBBs; + MBBs.reserve(MF->size()); + for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), + JoinSplitEdges && isSplitEdge(MBB))); + } + array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); + + // Coalesce intervals in MBB priority order. + unsigned CurrDepth = UINT_MAX; + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + // Try coalescing the collected local copies for deeper loops. + if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) { + coalesceLocals(); + CurrDepth = MBBs[i].Depth; + } + copyCoalesceInMBB(MBBs[i].MBB); + } + coalesceLocals(); + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + while (copyCoalesceWorkList(WorkList)) + /* empty */ ; +} + +void RegisterCoalescer::releaseMemory() { + ErasedInstrs.clear(); + WorkList.clear(); + DeadDefs.clear(); + InflateRegs.clear(); +} + +bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + MRI = &fn.getRegInfo(); + TM = &fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + LIS = &getAnalysis<LiveIntervals>(); + AA = &getAnalysis<AliasAnalysis>(); + Loops = &getAnalysis<MachineLoopInfo>(); + + const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); + if (EnableGlobalCopies == cl::BOU_UNSET) + JoinGlobalCopies = ST.enableMachineScheduler(); + else + JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); + + // The MachineScheduler does not currently require JoinSplitEdges. This will + // either be enabled unconditionally or replaced by a more general live range + // splitting optimization. + JoinSplitEdges = EnableJoinSplits; + + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " << MF->getName() << '\n'); + + if (VerifyCoalescing) + MF->verify(this, "Before register coalescing"); + + RegClassInfo.runOnMachineFunction(fn); + + // Join (coalesce) intervals if requested. + if (EnableJoining) + joinAllIntervals(); + + // After deleting a lot of copies, register classes may be less constrained. + // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> + // DPR inflation. + array_pod_sort(InflateRegs.begin(), InflateRegs.end()); + InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), + InflateRegs.end()); + DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); + for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { + unsigned Reg = InflateRegs[i]; + if (MRI->reg_nodbg_empty(Reg)) + continue; + if (MRI->recomputeRegClass(Reg, *TM)) { + DEBUG(dbgs() << PrintReg(Reg) << " inflated to " + << MRI->getRegClass(Reg)->getName() << '\n'); + ++NumInflated; + } + } + + DEBUG(dump()); + if (VerifyCoalescing) + MF->verify(this, "After register coalescing"); + return true; +} + +/// print - Implement the dump method. +void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { + LIS->print(O, m); +} diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h new file mode 100644 index 0000000..47c3df1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h @@ -0,0 +1,120 @@ +//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the abstract interface for register coalescers, +// allowing them to interact with and query register allocators. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H +#define LLVM_CODEGEN_REGISTER_COALESCER_H + +namespace llvm { + + class MachineInstr; + class TargetRegisterInfo; + class TargetRegisterClass; + class TargetInstrInfo; + + /// CoalescerPair - A helper class for register coalescers. When deciding if + /// two registers can be coalesced, CoalescerPair can determine if a copy + /// instruction would become an identity copy after coalescing. + class CoalescerPair { + const TargetRegisterInfo &TRI; + + /// DstReg - The register that will be left after coalescing. It can be a + /// virtual or physical register. + unsigned DstReg; + + /// SrcReg - the virtual register that will be coalesced into dstReg. + unsigned SrcReg; + + /// DstIdx - The sub-register index of the old DstReg in the new coalesced + /// register. + unsigned DstIdx; + + /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced + /// register. + unsigned SrcIdx; + + /// Partial - True when the original copy was a partial subregister copy. + bool Partial; + + /// CrossClass - True when both regs are virtual, and newRC is constrained. + bool CrossClass; + + /// Flipped - True when DstReg and SrcReg are reversed from the original + /// copy instruction. + bool Flipped; + + /// NewRC - The register class of the coalesced register, or NULL if DstReg + /// is a physreg. This register class may be a super-register of both + /// SrcReg and DstReg. + const TargetRegisterClass *NewRC; + + public: + CoalescerPair(const TargetRegisterInfo &tri) + : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + + /// Create a CoalescerPair representing a virtreg-to-physreg copy. + /// No need to call setRegisters(). + CoalescerPair(unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &tri) + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + + /// setRegisters - set registers to match the copy instruction MI. Return + /// false if MI is not a coalescable copy instruction. + bool setRegisters(const MachineInstr*); + + /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible + /// because DstReg is a physical register, or SubIdx is set. + bool flip(); + + /// isCoalescable - Return true if MI is a copy instruction that will become + /// an identity copy after coalescing. + bool isCoalescable(const MachineInstr*) const; + + /// isPhys - Return true if DstReg is a physical register. + bool isPhys() const { return !NewRC; } + + /// isPartial - Return true if the original copy instruction did not copy + /// the full register, but was a subreg operation. + bool isPartial() const { return Partial; } + + /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller + /// register class than DstReg's. + bool isCrossClass() const { return CrossClass; } + + /// isFlipped - Return true when getSrcReg is the register being defined by + /// the original copy instruction. + bool isFlipped() const { return Flipped; } + + /// getDstReg - Return the register (virtual or physical) that will remain + /// after coalescing. + unsigned getDstReg() const { return DstReg; } + + /// getSrcReg - Return the virtual register that will be coalesced away. + unsigned getSrcReg() const { return SrcReg; } + + /// getDstIdx - Return the subregister index that DstReg will be coalesced + /// into, or 0. + unsigned getDstIdx() const { return DstIdx; } + + /// getSrcIdx - Return the subregister index that SrcReg will be coalesced + /// into, or 0. + unsigned getSrcIdx() const { return SrcIdx; } + + /// getNewRC - Return the register class of the coalesced register. + const TargetRegisterClass *getNewRC() const { return NewRC; } + }; +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp new file mode 100644 index 0000000..97f22e1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -0,0 +1,793 @@ +//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterPressure class which can be used to track +// MachineInstr level register pressure. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +/// Increase pressure for each pressure set provided by TargetRegisterInfo. +static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, + std::vector<unsigned> &MaxSetPressure, + const int *PSet, unsigned Weight) { + for (; *PSet != -1; ++PSet) { + CurrSetPressure[*PSet] += Weight; + if (&CurrSetPressure != &MaxSetPressure + && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { + MaxSetPressure[*PSet] = CurrSetPressure[*PSet]; + } + } +} + +/// Decrease pressure for each pressure set provided by TargetRegisterInfo. +static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, + const int *PSet, unsigned Weight) { + for (; *PSet != -1; ++PSet) { + assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSet] -= Weight; + } +} + +/// Directly increase pressure only within this RegisterPressure result. +void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + increaseSetPressure(MaxSetPressure, MaxSetPressure, + TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + increaseSetPressure(MaxSetPressure, MaxSetPressure, + TRI->getRegUnitPressureSets(Reg), + TRI->getRegUnitWeight(Reg)); + } +} + +/// Directly decrease pressure only within this RegisterPressure result. +void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg), + TRI->getRegUnitWeight(Reg)); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static void dumpSetPressure(const std::vector<unsigned> &SetPressure, + const TargetRegisterInfo *TRI) { + for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) { + if (SetPressure[i] != 0) + dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n'; + } +} + +void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { + dbgs() << "Max Pressure: "; + dumpSetPressure(MaxSetPressure, TRI); + dbgs() << "Live In: "; + for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) + dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; + dbgs() << '\n'; + dbgs() << "Live Out: "; + for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i) + dbgs() << PrintReg(LiveOutRegs[i], TRI) << " "; + dbgs() << '\n'; +} + +void RegPressureTracker::dump() const { + dbgs() << "Curr Pressure: "; + dumpSetPressure(CurrSetPressure, TRI); + P.dump(TRI); +} +#endif + +/// Increase the current pressure as impacted by these registers and bump +/// the high water mark if needed. +void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) { + if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { + const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + TRI->getRegUnitPressureSets(Regs[I]), + TRI->getRegUnitWeight(Regs[I])); + } + } +} + +/// Simply decrease the current pressure as impacted by these registers. +void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) { + if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { + const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); + decreaseSetPressure(CurrSetPressure, + TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]), + TRI->getRegUnitWeight(Regs[I])); + } + } +} + +/// Clear the result so it can be used for another round of pressure tracking. +void IntervalPressure::reset() { + TopIdx = BottomIdx = SlotIndex(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// Clear the result so it can be used for another round of pressure tracking. +void RegionPressure::reset() { + TopPos = BottomPos = MachineBasicBlock::const_iterator(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// If the current top is not less than or equal to the next index, open it. +/// We happen to need the SlotIndex for the next top for pressure update. +void IntervalPressure::openTop(SlotIndex NextTop) { + if (TopIdx <= NextTop) + return; + TopIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current top is the previous instruction (before receding), open it. +void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) { + if (TopPos != PrevTop) + return; + TopPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +/// If the current bottom is not greater than the previous index, open it. +void IntervalPressure::openBottom(SlotIndex PrevBottom) { + if (BottomIdx > PrevBottom) + return; + BottomIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current bottom is the previous instr (before advancing), open it. +void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { + if (BottomPos != PrevBottom) + return; + BottomPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return &LIS->getInterval(Reg); + return LIS->getCachedRegUnit(Reg); +} + +/// Setup the RegPressureTracker. +/// +/// TODO: Add support for pressure without LiveIntervals. +void RegPressureTracker::init(const MachineFunction *mf, + const RegisterClassInfo *rci, + const LiveIntervals *lis, + const MachineBasicBlock *mbb, + MachineBasicBlock::const_iterator pos) +{ + MF = mf; + TRI = MF->getTarget().getRegisterInfo(); + RCI = rci; + MRI = &MF->getRegInfo(); + MBB = mbb; + + if (RequireIntervals) { + assert(lis && "IntervalPressure requires LiveIntervals"); + LIS = lis; + } + + CurrPos = pos; + CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); + + if (RequireIntervals) + static_cast<IntervalPressure&>(P).reset(); + else + static_cast<RegionPressure&>(P).reset(); + P.MaxSetPressure = CurrSetPressure; + + LiveRegs.PhysRegs.clear(); + LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); + LiveRegs.VirtRegs.clear(); + LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); +} + +/// Does this pressure result have a valid top position and live ins. +bool RegPressureTracker::isTopClosed() const { + if (RequireIntervals) + return static_cast<IntervalPressure&>(P).TopIdx.isValid(); + return (static_cast<RegionPressure&>(P).TopPos == + MachineBasicBlock::const_iterator()); +} + +/// Does this pressure result have a valid bottom position and live outs. +bool RegPressureTracker::isBottomClosed() const { + if (RequireIntervals) + return static_cast<IntervalPressure&>(P).BottomIdx.isValid(); + return (static_cast<RegionPressure&>(P).BottomPos == + MachineBasicBlock::const_iterator()); +} + + +SlotIndex RegPressureTracker::getCurrSlot() const { + MachineBasicBlock::const_iterator IdxPos = CurrPos; + while (IdxPos != MBB->end() && IdxPos->isDebugValue()) + ++IdxPos; + if (IdxPos == MBB->end()) + return LIS->getMBBEndIdx(MBB); + return LIS->getInstructionIndex(IdxPos).getRegSlot(); +} + +/// Set the boundary for the top of the region and summarize live ins. +void RegPressureTracker::closeTop() { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot(); + else + static_cast<RegionPressure&>(P).TopPos = CurrPos; + + assert(P.LiveInRegs.empty() && "inconsistent max pressure result"); + P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); + P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); + for (SparseSet<unsigned>::const_iterator I = + LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) + P.LiveInRegs.push_back(*I); + std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end()); + P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()), + P.LiveInRegs.end()); +} + +/// Set the boundary for the bottom of the region and summarize live outs. +void RegPressureTracker::closeBottom() { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot(); + else + static_cast<RegionPressure&>(P).BottomPos = CurrPos; + + assert(P.LiveOutRegs.empty() && "inconsistent max pressure result"); + P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); + P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); + for (SparseSet<unsigned>::const_iterator I = + LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) + P.LiveOutRegs.push_back(*I); + std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end()); + P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()), + P.LiveOutRegs.end()); +} + +/// Finalize the region boundaries and record live ins and live outs. +void RegPressureTracker::closeRegion() { + if (!isTopClosed() && !isBottomClosed()) { + assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() && + "no region boundary"); + return; + } + if (!isBottomClosed()) + closeBottom(); + else if (!isTopClosed()) + closeTop(); + // If both top and bottom are closed, do nothing. +} + +/// \brief Convenient wrapper for checking membership in RegisterOperands. +static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) { + return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end(); +} + +/// Collect this instruction's unique uses and defs into SmallVectors for +/// processing defs and uses in order. +class RegisterOperands { + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + +public: + SmallVector<unsigned, 8> Uses; + SmallVector<unsigned, 8> Defs; + SmallVector<unsigned, 8> DeadDefs; + + RegisterOperands(const TargetRegisterInfo *tri, + const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {} + + /// Push this operand's register onto the correct vector. + void collect(const MachineOperand &MO) { + if (!MO.isReg() || !MO.getReg()) + return; + if (MO.readsReg()) + pushRegUnits(MO.getReg(), Uses); + if (MO.isDef()) { + if (MO.isDead()) + pushRegUnits(MO.getReg(), DeadDefs); + else + pushRegUnits(MO.getReg(), Defs); + } + } + +protected: + void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (containsReg(Regs, Reg)) + return; + Regs.push_back(Reg); + } + else if (MRI->isAllocatable(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (containsReg(Regs, *Units)) + continue; + Regs.push_back(*Units); + } + } + } +}; + +/// Collect physical and virtual register operands. +static void collectOperands(const MachineInstr *MI, + RegisterOperands &RegOpers) { + for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) + RegOpers.collect(*OperI); + + // Remove redundant physreg dead defs. + SmallVectorImpl<unsigned>::iterator I = + std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), + std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); + RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); +} + +/// Force liveness of registers. +void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + if (LiveRegs.insert(Regs[i])) + increaseRegPressure(Regs[i]); + } +} + +/// Add Reg to the live in set and increase max pressure. +void RegPressureTracker::discoverLiveIn(unsigned Reg) { + assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); + if (containsReg(P.LiveInRegs, Reg)) + return; + + // At live in discovery, unconditionally increase the high water mark. + P.LiveInRegs.push_back(Reg); + P.increase(Reg, TRI, MRI); +} + +/// Add Reg to the live out set and increase max pressure. +void RegPressureTracker::discoverLiveOut(unsigned Reg) { + assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); + if (containsReg(P.LiveOutRegs, Reg)) + return; + + // At live out discovery, unconditionally increase the high water mark. + P.LiveOutRegs.push_back(Reg); + P.increase(Reg, TRI, MRI); +} + +/// Recede across the previous instruction. +bool RegPressureTracker::recede() { + // Check for the top of the analyzable region. + if (CurrPos == MBB->begin()) { + closeRegion(); + return false; + } + if (!isBottomClosed()) + closeBottom(); + + // Open the top of the region using block iterators. + if (!RequireIntervals && isTopClosed()) + static_cast<RegionPressure&>(P).openTop(CurrPos); + + // Find the previous instruction. + do + --CurrPos; + while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + + if (CurrPos->isDebugValue()) { + closeRegion(); + return false; + } + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + + // Open the top of the region using slot indexes. + if (RequireIntervals && isTopClosed()) + static_cast<IntervalPressure&>(P).openTop(SlotIdx); + + RegisterOperands RegOpers(TRI, MRI); + collectOperands(CurrPos, RegOpers); + + // Boost pressure for all dead defs together. + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); + + // Kill liveness at live defs. + // TODO: consider earlyclobbers? + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); + } + + // Generate liveness for uses. + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + if (!LiveRegs.contains(Reg)) { + // Adjust liveouts if LiveIntervals are available. + if (RequireIntervals) { + const LiveInterval *LI = getInterval(Reg); + if (LI && !LI->killedAt(SlotIdx)) + discoverLiveOut(Reg); + } + increaseRegPressure(Reg); + LiveRegs.insert(Reg); + } + } + return true; +} + +/// Advance across the current instruction. +bool RegPressureTracker::advance() { + // Check for the bottom of the analyzable region. + if (CurrPos == MBB->end()) { + closeRegion(); + return false; + } + if (!isTopClosed()) + closeTop(); + + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = getCurrSlot(); + + // Open the bottom of the region using slot indexes. + if (isBottomClosed()) { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).openBottom(SlotIdx); + else + static_cast<RegionPressure&>(P).openBottom(CurrPos); + } + + RegisterOperands RegOpers(TRI, MRI); + collectOperands(CurrPos, RegOpers); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + // Discover live-ins. + bool isLive = LiveRegs.contains(Reg); + if (!isLive) + discoverLiveIn(Reg); + // Kill liveness at last uses. + bool lastUse = false; + if (RequireIntervals) { + const LiveInterval *LI = getInterval(Reg); + lastUse = LI && LI->killedAt(SlotIdx); + } + else { + // Allocatable physregs are always single-use before register rewriting. + lastUse = !TargetRegisterInfo::isVirtualRegister(Reg); + } + if (lastUse && isLive) { + LiveRegs.erase(Reg); + decreaseRegPressure(Reg); + } + else if (!lastUse && !isLive) + increaseRegPressure(Reg); + } + + // Generate liveness for defs. + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (LiveRegs.insert(Reg)) + increaseRegPressure(Reg); + } + + // Boost pressure for all dead defs together. + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); + + // Find the next instruction. + do + ++CurrPos; + while (CurrPos != MBB->end() && CurrPos->isDebugValue()); + return true; +} + +/// Find the max change in excess pressure across all sets. +static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, + ArrayRef<unsigned> NewPressureVec, + RegPressureDelta &Delta, + const TargetRegisterInfo *TRI) { + int ExcessUnits = 0; + unsigned PSetID = ~0U; + for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { + unsigned POld = OldPressureVec[i]; + unsigned PNew = NewPressureVec[i]; + int PDiff = (int)PNew - (int)POld; + if (!PDiff) // No change in this set in the common case. + continue; + // Only consider change beyond the limit. + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (Limit > POld) { + if (Limit > PNew) + PDiff = 0; // Under the limit + else + PDiff = PNew - Limit; // Just exceeded limit. + } + else if (Limit > PNew) + PDiff = Limit - POld; // Just obeyed limit. + + if (std::abs(PDiff) > std::abs(ExcessUnits)) { + ExcessUnits = PDiff; + PSetID = i; + } + } + Delta.Excess.PSetID = PSetID; + Delta.Excess.UnitIncrease = ExcessUnits; +} + +/// Find the max change in max pressure that either surpasses a critical PSet +/// limit or exceeds the current MaxPressureLimit. +/// +/// FIXME: comparing each element of the old and new MaxPressure vectors here is +/// silly. It's done now to demonstrate the concept but will go away with a +/// RegPressureTracker API change to work with pressure differences. +static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, + ArrayRef<unsigned> NewMaxPressureVec, + ArrayRef<PressureElement> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit, + RegPressureDelta &Delta) { + Delta.CriticalMax = PressureElement(); + Delta.CurrentMax = PressureElement(); + + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { + unsigned POld = OldMaxPressureVec[i]; + unsigned PNew = NewMaxPressureVec[i]; + if (PNew == POld) // No change in this set in the common case. + continue; + + while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; + if (PDiff > Delta.CriticalMax.UnitIncrease) { + Delta.CriticalMax.PSetID = i; + Delta.CriticalMax.UnitIncrease = PDiff; + } + } + + // Find the greatest increase above MaxPressureLimit. + // (Ignores negative MDiff). + int MDiff = (int)PNew - (int)MaxPressureLimit[i]; + if (MDiff > Delta.CurrentMax.UnitIncrease) { + Delta.CurrentMax.PSetID = i; + Delta.CurrentMax.UnitIncrease = PNew; + } + } +} + +/// Record the upward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { + assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + + // Account for register pressure similar to RegPressureTracker::recede(). + RegisterOperands RegOpers(TRI, MRI); + collectOperands(MI, RegOpers); + + // Boost max pressure for all dead defs together. + // Since CurrSetPressure and MaxSetPressure + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); + + // Kill liveness at live defs. + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); + } + // Generate liveness for uses. + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + if (!LiveRegs.contains(Reg)) + increaseRegPressure(Reg); + } +} + +/// Consider the pressure increase caused by traversing this instruction +/// bottom-up. Find the pressure set with the most change beyond its pressure +/// limit based on the tracker's current pressure, and return the change in +/// number of register units of that pressure set introduced by this +/// instruction. +/// +/// This assumes that the current LiveOut set is sufficient. +/// +/// FIXME: This is expensive for an on-the-fly query. We need to cache the +/// result per-SUnit with enough information to adjust for the current +/// scheduling position. But this works as a proof of concept. +void RegPressureTracker:: +getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, + ArrayRef<PressureElement> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) { + // Snapshot Pressure. + // FIXME: The snapshot heap space should persist. But I'm planning to + // summarize the pressure effect so we don't need to snapshot at all. + std::vector<unsigned> SavedPressure = CurrSetPressure; + std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.UnitIncrease >= 0 && + Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); +} + +/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). +static bool findUseBetween(unsigned Reg, + SlotIndex PriorUseIdx, SlotIndex NextUseIdx, + const MachineRegisterInfo *MRI, + const LiveIntervals *LIS) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + if (MI->isDebugValue()) + continue; + SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) + return true; + } + return false; +} + +/// Record the downward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { + assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + + // Account for register pressure similar to RegPressureTracker::recede(). + RegisterOperands RegOpers(TRI, MRI); + collectOperands(MI, RegOpers); + + // Kill liveness at last uses. Assume allocatable physregs are single-use + // rather than checking LiveIntervals. + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + if (RequireIntervals) { + // FIXME: allow the caller to pass in the list of vreg uses that remain + // to be bottom-scheduled to avoid searching uses at each query. + SlotIndex CurrIdx = getCurrSlot(); + const LiveInterval *LI = getInterval(Reg); + if (LI && LI->killedAt(SlotIdx) + && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + decreaseRegPressure(Reg); + } + } + else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Allocatable physregs are always single-use before register rewriting. + decreaseRegPressure(Reg); + } + } + + // Generate liveness for defs. + increaseRegPressure(RegOpers.Defs); + + // Boost pressure for all dead defs together. + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); +} + +/// Consider the pressure increase caused by traversing this instruction +/// top-down. Find the register class with the most change in its pressure limit +/// based on the tracker's current pressure, and return the number of excess +/// register units of that pressure set introduced by this instruction. +/// +/// This assumes that the current LiveIn set is sufficient. +void RegPressureTracker:: +getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, + ArrayRef<PressureElement> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) { + // Snapshot Pressure. + std::vector<unsigned> SavedPressure = CurrSetPressure; + std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.UnitIncrease >= 0 && + Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); +} + +/// Get the pressure of each PSet after traversing this instruction bottom-up. +void RegPressureTracker:: +getUpwardPressure(const MachineInstr *MI, + std::vector<unsigned> &PressureResult, + std::vector<unsigned> &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} + +/// Get the pressure of each PSet after traversing this instruction top-down. +void RegPressureTracker:: +getDownwardPressure(const MachineInstr *MI, + std::vector<unsigned> &PressureResult, + std::vector<unsigned> &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp new file mode 100644 index 0000000..07ace7a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -0,0 +1,443 @@ +//===-- RegisterScavenging.cpp - Machine register scavenging --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine register scavenger. It can provide +// information, such as unused registers, at any point in a machine basic block. +// It also provides a mechanism to make registers available by evicting them to +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reg-scavenging" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +/// setUsed - Set the register and its sub-registers as being used. +void RegScavenger::setUsed(unsigned Reg) { + RegsAvailable.reset(Reg); + + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RegsAvailable.reset(*SubRegs); +} + +bool RegScavenger::isAliasUsed(unsigned Reg) const { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (isUsed(*AI, *AI == Reg)) + return true; + return false; +} + +void RegScavenger::initRegState() { + for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + I->Reg = 0; + I->Restore = NULL; + } + + // All registers started out unused. + RegsAvailable.set(); + + if (!MBB) + return; + + // Live-in registers are in use. + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + setUsed(*I); + + // Pristine CSRs are also unavailable. + BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) + setUsed(I); +} + +void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { + MachineFunction &MF = *mbb->getParent(); + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + MRI = &MF.getRegInfo(); + + assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && + "Target changed?"); + + // It is not possible to use the register scavenger after late optimization + // passes that don't preserve accurate liveness information. + assert(MRI->tracksLiveness() && + "Cannot use register scavenger with inaccurate liveness"); + + // Self-initialize. + if (!MBB) { + NumPhysRegs = TRI->getNumRegs(); + RegsAvailable.resize(NumPhysRegs); + KillRegs.resize(NumPhysRegs); + DefRegs.resize(NumPhysRegs); + + // Create callee-saved registers bitvector. + CalleeSavedRegs.resize(NumPhysRegs); + const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); + if (CSRegs != NULL) + for (unsigned i = 0; CSRegs[i]; ++i) + CalleeSavedRegs.set(CSRegs[i]); + } + + MBB = mbb; + initRegState(); + + Tracking = false; +} + +void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { + BV.set(Reg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + BV.set(*SubRegs); +} + +void RegScavenger::determineKillsAndDefs() { + assert(Tracking && "Must be tracking to determine kills and defs"); + + MachineInstr *MI = MBBI; + assert(!MI->isDebugValue() && "Debug values have no kills or defs"); + + // Find out which registers are early clobbered, killed, defined, and marked + // def-dead in this instruction. + // FIXME: The scavenger is not predication aware. If the instruction is + // predicated, conservatively assume "kill" markers do not actually kill the + // register. Similarly ignores "dead" markers. + bool isPred = TII->isPredicated(MI); + KillRegs.reset(); + DefRegs.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) + continue; + + if (MO.isUse()) { + // Ignore undef uses. + if (MO.isUndef()) + continue; + if (!isPred && MO.isKill()) + addRegWithSubRegs(KillRegs, Reg); + } else { + assert(MO.isDef()); + if (!isPred && MO.isDead()) + addRegWithSubRegs(KillRegs, Reg); + else + addRegWithSubRegs(DefRegs, Reg); + } + } +} + +void RegScavenger::unprocess() { + assert(Tracking && "Cannot unprocess because we're not tracking"); + + MachineInstr *MI = MBBI; + if (MI->isDebugValue()) + return; + + determineKillsAndDefs(); + + // Commit the changes. + setUsed(KillRegs); + setUnused(DefRegs); + + if (MBBI == MBB->begin()) { + MBBI = MachineBasicBlock::iterator(NULL); + Tracking = false; + } else + --MBBI; +} + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already past the end of the basic block!"); + MBBI = llvm::next(MBBI); + } + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + + MachineInstr *MI = MBBI; + + for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + if (I->Restore != MI) + continue; + + I->Reg = 0; + I->Restore = NULL; + } + + if (MI->isDebugValue()) + return; + + determineKillsAndDefs(); + + // Verify uses and defs. +#ifndef NDEBUG + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) + continue; + if (MO.isUse()) { + if (MO.isUndef()) + continue; + if (!isUsed(Reg)) { + // Check if it's partial live: e.g. + // D0 = insert_subreg D0<undef>, S0 + // ... D0 + // The problem is the insert_subreg could be eliminated. The use of + // D0 is using a partially undef value. This is not *incorrect* since + // S1 is can be freely clobbered. + // Ideally we would like a way to model this, but leaving the + // insert_subreg around causes both correctness and performance issues. + bool SubUsed = false; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + if (isUsed(*SubRegs)) { + SubUsed = true; + break; + } + if (!SubUsed) { + MBB->getParent()->verify(NULL, "In Register Scavenger"); + llvm_unreachable("Using an undefined register!"); + } + (void)SubUsed; + } + } else { + assert(MO.isDef()); +#if 0 + // FIXME: Enable this once we've figured out how to correctly transfer + // implicit kills during codegen passes like the coalescer. + assert((KillRegs.test(Reg) || isUnused(Reg) || + isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && + "Re-defining a live register!"); +#endif + } + } +#endif // NDEBUG + + // Commit the changes. + setUnused(KillRegs); + setUsed(DefRegs); +} + +void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { + used = RegsAvailable; + used.flip(); + if (includeReserved) + used |= MRI->getReservedRegs(); + else + used.reset(MRI->getReservedRegs()); +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) { + DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << + "\n"); + return *I; + } + return 0; +} + +/// getRegsAvailable - Return all available registers in the register class +/// in Mask. +BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { + BitVector Mask(TRI->getNumRegs()); + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) + Mask.set(*I); + return Mask; +} + +/// findSurvivorReg - Return the candidate register that is unused for the +/// longest after StargMII. UseMI is set to the instruction where the search +/// stopped. +/// +/// No more than InstrLimit instructions are inspected. +/// +unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, + BitVector &Candidates, + unsigned InstrLimit, + MachineBasicBlock::iterator &UseMI) { + int Survivor = Candidates.find_first(); + assert(Survivor > 0 && "No candidates for scavenging"); + + MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); + assert(StartMI != ME && "MI already at terminator"); + MachineBasicBlock::iterator RestorePointMI = StartMI; + MachineBasicBlock::iterator MI = StartMI; + + bool inVirtLiveRange = false; + for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + if (MI->isDebugValue()) { + ++InstrLimit; // Don't count debug instructions + continue; + } + bool isVirtKillInsn = false; + bool isVirtDefInsn = false; + // Remove any candidates touched by instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + Candidates.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg() || MO.isUndef() || !MO.getReg()) + continue; + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isDef()) + isVirtDefInsn = true; + else if (MO.isKill()) + isVirtKillInsn = true; + continue; + } + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + // If we're not in a virtual reg's live range, this is a valid + // restore point. + if (!inVirtLiveRange) RestorePointMI = MI; + + // Update whether we're in the live range of a virtual register + if (isVirtKillInsn) inVirtLiveRange = false; + if (isVirtDefInsn) inVirtLiveRange = true; + + // Was our survivor untouched by this instruction? + if (Candidates.test(Survivor)) + continue; + + // All candidates gone? + if (Candidates.none()) + break; + + Survivor = Candidates.find_first(); + } + // If we ran off the end, that's where we want to restore. + if (MI == ME) RestorePointMI = ME; + assert (RestorePointMI != StartMI && + "No available scavenger restore location!"); + + // We ran out of candidates, so stop the search. + UseMI = RestorePointMI; + return Survivor; +} + +static unsigned getFrameIndexOperandNum(MachineInstr *MI) { + unsigned i = 0; + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + return i; +} + +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + // Consider all allocatable registers in the register class initially + BitVector Candidates = + TRI->getAllocatableSet(*I->getParent()->getParent(), RC); + + // Exclude all the registers being used by the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (MO.isReg() && MO.getReg() != 0 && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + Candidates.reset(MO.getReg()); + } + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. Search explicitly rather than masking out based on + // RegsAvailable, as RegsAvailable does not take aliases into account. + // That's what getRegsAvailable() is for. + BitVector Available = getRegsAvailable(RC); + Available &= Candidates; + if (Available.any()) + Candidates = Available; + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + + // If we found an unused register there is no reason to spill it. + if (!isAliasUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + return SReg; + } + + // Find an available scavenging slot. + unsigned SI; + for (SI = 0; SI < Scavenged.size(); ++SI) + if (Scavenged[SI].Reg == 0) + break; + + if (SI == Scavenged.size()) { + // We need to scavenge a register but have no spill slot, the target + // must know how to do it (if not, we'll assert below). + Scavenged.push_back(ScavengedInfo()); + } + + // Avoid infinite regress + Scavenged[SI].Reg = SReg; + + // If the target knows how to save/restore the register, let it do so; + // otherwise, use the emergency stack spill slot. + if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { + // Spill the scavenged register before I. + assert(Scavenged[SI].FrameIndex >= 0 && + "Cannot scavenge register without an emergency spill slot!"); + TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, + RC, TRI); + MachineBasicBlock::iterator II = prior(I); + + unsigned FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); + + // Restore the scavenged register before its use (or first terminator). + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, + RC, TRI); + II = prior(UseMI); + + FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); + } + + Scavenged[SI].Restore = prior(UseMI); + + // Doing this here leads to infinite regress. + // Scavenged[SI].Reg = SReg; + + DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << + "\n"); + + return SReg; +} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp new file mode 100644 index 0000000..07e5b47 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -0,0 +1,642 @@ +//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <climits> +using namespace llvm; + +#ifndef NDEBUG +static cl::opt<bool> StressSchedOpt( + "stress-sched", cl::Hidden, cl::init(false), + cl::desc("Stress test instruction scheduling")); +#endif + +void SchedulingPriorityQueue::anchor() { } + +ScheduleDAG::ScheduleDAG(MachineFunction &mf) + : TM(mf.getTarget()), + TII(TM.getInstrInfo()), + TRI(TM.getRegisterInfo()), + MF(mf), MRI(mf.getRegInfo()), + EntrySU(), ExitSU() { +#ifndef NDEBUG + StressSched = StressSchedOpt; +#endif +} + +ScheduleDAG::~ScheduleDAG() {} + +/// Clear the DAG state (e.g. between scheduling regions). +void ScheduleDAG::clearDAG() { + SUnits.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); +} + +/// getInstrDesc helper to handle SDNodes. +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node || !Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); +} + +/// addPred - This adds the specified edge as a pred of the current node if +/// not already. It also adds the current node as a successor of the +/// specified node. +bool SUnit::addPred(const SDep &D, bool Required) { + // If this node already has this depenence, don't add a redundant one. + for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + // Zero-latency weak edges may be added purely for heuristic ordering. Don't + // add them if another kind of edge already exists. + if (!Required && I->getSUnit() == D.getSUnit()) + return false; + if (I->overlaps(D)) { + // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). + if (I->getLatency() < D.getLatency()) { + SUnit *PredSU = I->getSUnit(); + // Find the corresponding successor in N. + SDep ForwardD = *I; + ForwardD.setSUnit(this); + for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + if (*II == ForwardD) { + II->setLatency(D.getLatency()); + break; + } + } + I->setLatency(D.getLatency()); + } + return false; + } + } + // Now add a corresponding succ to N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + // Update the bookkeeping. + if (D.getKind() == SDep::Data) { + assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); + assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); + ++NumPreds; + ++N->NumSuccs; + } + if (!N->isScheduled) { + if (D.isWeak()) { + ++WeakPredsLeft; + } + else { + assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); + ++NumPredsLeft; + } + } + if (!isScheduled) { + if (D.isWeak()) { + ++N->WeakSuccsLeft; + } + else { + assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++N->NumSuccsLeft; + } + } + Preds.push_back(D); + N->Succs.push_back(P); + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } + return true; +} + +/// removePred - This removes the specified edge as a pred of the current +/// node if it exists. It also removes the current node as a successor of +/// the specified node. +void SUnit::removePred(const SDep &D) { + // Find the matching predecessor. + for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) { + // Find the corresponding successor in N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(), + N->Succs.end(), P); + assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); + N->Succs.erase(Succ); + Preds.erase(I); + // Update the bookkeeping. + if (P.getKind() == SDep::Data) { + assert(NumPreds > 0 && "NumPreds will underflow!"); + assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); + --NumPreds; + --N->NumSuccs; + } + if (!N->isScheduled) { + if (D.isWeak()) + --WeakPredsLeft; + else { + assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); + --NumPredsLeft; + } + } + if (!isScheduled) { + if (D.isWeak()) + --N->WeakSuccsLeft; + else { + assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); + --N->NumSuccsLeft; + } + } + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } + return; + } +} + +void SUnit::setDepthDirty() { + if (!isDepthCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isDepthCurrent = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), + E = SU->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isDepthCurrent) + WorkList.push_back(SuccSU); + } + } while (!WorkList.empty()); +} + +void SUnit::setHeightDirty() { + if (!isHeightCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isHeightCurrent = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isHeightCurrent) + WorkList.push_back(PredSU); + } + } while (!WorkList.empty()); +} + +/// setDepthToAtLeast - Update this node's successors to reflect the +/// fact that this node's depth just increased. +/// +void SUnit::setDepthToAtLeast(unsigned NewDepth) { + if (NewDepth <= getDepth()) + return; + setDepthDirty(); + Depth = NewDepth; + isDepthCurrent = true; +} + +/// setHeightToAtLeast - Update this node's predecessors to reflect the +/// fact that this node's height just increased. +/// +void SUnit::setHeightToAtLeast(unsigned NewHeight) { + if (NewHeight <= getHeight()) + return; + setHeightDirty(); + Height = NewHeight; + isHeightCurrent = true; +} + +/// ComputeDepth - Calculate the maximal path from the node to the exit. +/// +void SUnit::ComputeDepth() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxPredDepth = 0; + for (SUnit::const_pred_iterator I = Cur->Preds.begin(), + E = Cur->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isDepthCurrent) + MaxPredDepth = std::max(MaxPredDepth, + PredSU->Depth + I->getLatency()); + else { + Done = false; + WorkList.push_back(PredSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxPredDepth != Cur->Depth) { + Cur->setDepthDirty(); + Cur->Depth = MaxPredDepth; + } + Cur->isDepthCurrent = true; + } + } while (!WorkList.empty()); +} + +/// ComputeHeight - Calculate the maximal path from the node to the entry. +/// +void SUnit::ComputeHeight() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxSuccHeight = 0; + for (SUnit::const_succ_iterator I = Cur->Succs.begin(), + E = Cur->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isHeightCurrent) + MaxSuccHeight = std::max(MaxSuccHeight, + SuccSU->Height + I->getLatency()); + else { + Done = false; + WorkList.push_back(SuccSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxSuccHeight != Cur->Height) { + Cur->setHeightDirty(); + Cur->Height = MaxSuccHeight; + } + Cur->isHeightCurrent = true; + } + } while (!WorkList.empty()); +} + +void SUnit::biasCriticalPath() { + if (NumPreds < 2) + return; + + SUnit::pred_iterator BestI = Preds.begin(); + unsigned MaxDepth = BestI->getSUnit()->getDepth(); + for (SUnit::pred_iterator + I = llvm::next(BestI), E = Preds.end(); I != E; ++I) { + if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) + BestI = I; + } + if (BestI != Preds.begin()) + std::swap(*Preds.begin(), *BestI); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or +/// a group of nodes flagged together. +void SUnit::dump(const ScheduleDAG *G) const { + dbgs() << "SU(" << NodeNum << "): "; + G->dumpNode(this); +} + +void SUnit::dumpAll(const ScheduleDAG *G) const { + dump(G); + + dbgs() << " # preds left : " << NumPredsLeft << "\n"; + dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + if (WeakPredsLeft) + dbgs() << " # weak preds left : " << WeakPredsLeft << "\n"; + if (WeakSuccsLeft) + dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; + dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; + dbgs() << " Latency : " << Latency << "\n"; + dbgs() << " Depth : " << getDepth() << "\n"; + dbgs() << " Height : " << getHeight() << "\n"; + + if (Preds.size() != 0) { + dbgs() << " Predecessors:\n"; + for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + dbgs() << " "; + switch (I->getKind()) { + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; + } + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); + dbgs() << "\n"; + } + } + if (Succs.size() != 0) { + dbgs() << " Successors:\n"; + for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); + I != E; ++I) { + dbgs() << " "; + switch (I->getKind()) { + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; + } + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); + dbgs() << "\n"; + } + } + dbgs() << "\n"; +} +#endif + +#ifndef NDEBUG +/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that +/// their state is consistent. Return the number of scheduled nodes. +/// +unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { + bool AnyNotSched = false; + unsigned DeadNodes = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].isScheduled && + (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) > + unsigned(INT_MAX)) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has an unexpected " + << (isBottomUp ? "Height" : "Depth") << " value!\n"; + AnyNotSched = true; + } + if (isBottomUp) { + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has successors left!\n"; + AnyNotSched = true; + } + } else { + if (SUnits[i].NumPredsLeft != 0) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has predecessors left!\n"; + AnyNotSched = true; + } + } + } + assert(!AnyNotSched); + return SUnits.size() - DeadNodes; +} +#endif + +/// InitDAGTopologicalSorting - create the initial topological +/// ordering from the DAG to be scheduled. +/// +/// The idea of the algorithm is taken from +/// "Online algorithms for managing the topological order of +/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// "Maintaining a topological order under edge insertions". +/// +/// Short description of the algorithm: +/// +/// Topological ordering, ord, of a DAG maps each node to a topological +/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). +/// +/// This means that if there is a path from the node X to the node Z, +/// then ord(X) < ord(Z). +/// +/// This property can be used to check for reachability of nodes: +/// if Z is reachable from X, then an insertion of the edge Z->X would +/// create a cycle. +/// +/// The algorithm first computes a topological ordering for the DAG by +/// initializing the Index2Node and Node2Index arrays and then tries to keep +/// the ordering up-to-date after edge insertions by reordering the DAG. +/// +/// On insertion of the edge X->Y, the algorithm first marks by calling DFS +/// the nodes reachable from Y, and then shifts them using Shift to lie +/// immediately after X in Index2Node. +void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { + unsigned DAGSize = SUnits.size(); + std::vector<SUnit*> WorkList; + WorkList.reserve(DAGSize); + + Index2Node.resize(DAGSize); + Node2Index.resize(DAGSize); + + // Initialize the data structures. + if (ExitSU) + WorkList.push_back(ExitSU); + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + int NodeNum = SU->NodeNum; + unsigned Degree = SU->Succs.size(); + // Temporarily use the Node2Index array as scratch space for degree counts. + Node2Index[NodeNum] = Degree; + + // Is it a node without dependencies? + if (Degree == 0) { + assert(SU->Succs.empty() && "SUnit should have no successors"); + // Collect leaf nodes. + WorkList.push_back(SU); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + if (SU->NodeNum < DAGSize) + Allocate(SU->NodeNum, --Id); + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit *SU = I->getSUnit(); + if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum]) + // If all dependencies of the node are processed already, + // then the node can be computed now. + WorkList.push_back(SU); + } + } + + Visited.resize(DAGSize); + +#ifndef NDEBUG + // Check correctness of the ordering + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + "Wrong topological sorting"); + } + } +#endif +} + +/// AddPred - Updates the topological ordering to accommodate an edge +/// to be added from SUnit X to SUnit Y. +void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { + int UpperBound, LowerBound; + LowerBound = Node2Index[Y->NodeNum]; + UpperBound = Node2Index[X->NodeNum]; + bool HasLoop = false; + // Is Ord(X) < Ord(Y) ? + if (LowerBound < UpperBound) { + // Update the topological order. + Visited.reset(); + DFS(Y, UpperBound, HasLoop); + assert(!HasLoop && "Inserted edge creates a loop!"); + // Recompute topological indexes. + Shift(Visited, LowerBound, UpperBound); + } +} + +/// RemovePred - Updates the topological ordering to accommodate an +/// an edge to be removed from the specified node N from the predecessors +/// of the current node M. +void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { + // InitDAGTopologicalSorting(); +} + +/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark +/// all nodes affected by the edge insertion. These nodes will later get new +/// topological indexes by means of the Shift method. +void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, + bool &HasLoop) { + std::vector<const SUnit*> WorkList; + WorkList.reserve(SUnits.size()); + + WorkList.push_back(SU); + do { + SU = WorkList.back(); + WorkList.pop_back(); + Visited.set(SU->NodeNum); + for (int I = SU->Succs.size()-1; I >= 0; --I) { + unsigned s = SU->Succs[I].getSUnit()->NodeNum; + // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). + if (s >= Node2Index.size()) + continue; + if (Node2Index[s] == UpperBound) { + HasLoop = true; + return; + } + // Visit successors if not already and in affected region. + if (!Visited.test(s) && Node2Index[s] < UpperBound) { + WorkList.push_back(SU->Succs[I].getSUnit()); + } + } + } while (!WorkList.empty()); +} + +/// Shift - Renumber the nodes so that the topological ordering is +/// preserved. +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, + int UpperBound) { + std::vector<int> L; + int shift = 0; + int i; + + for (i = LowerBound; i <= UpperBound; ++i) { + // w is node at topological index i. + int w = Index2Node[i]; + if (Visited.test(w)) { + // Unmark. + Visited.reset(w); + L.push_back(w); + shift = shift + 1; + } else { + Allocate(w, i - shift); + } + } + + for (unsigned j = 0; j < L.size(); ++j) { + Allocate(L[j], i - shift); + i = i + 1; + } +} + + +/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will +/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU). +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { + // Is SU reachable from TargetSU via successor edges? + if (IsReachable(SU, TargetSU)) + return true; + for (SUnit::pred_iterator + I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I) + if (I->isAssignedRegDep() && + IsReachable(SU, I->getSUnit())) + return true; + return false; +} + +/// IsReachable - Checks if SU is reachable from TargetSU. +bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, + const SUnit *TargetSU) { + // If insertion of the edge SU->TargetSU would create a cycle + // then there is a path from TargetSU to SU. + int UpperBound, LowerBound; + LowerBound = Node2Index[TargetSU->NodeNum]; + UpperBound = Node2Index[SU->NodeNum]; + bool HasLoop = false; + // Is Ord(TargetSU) < Ord(SU) ? + if (LowerBound < UpperBound) { + Visited.reset(); + // There may be a path from TargetSU to SU. Check for it. + DFS(TargetSU, UpperBound, HasLoop); + } + return HasLoop; +} + +/// Allocate - assign the topological index to the node n. +void ScheduleDAGTopologicalSort::Allocate(int n, int index) { + Node2Index[n] = index; + Index2Node[index] = n; +} + +ScheduleDAGTopologicalSort:: +ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu) + : SUnits(sunits), ExitSU(exitsu) {} + +ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp new file mode 100644 index 0000000..71e7a21 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -0,0 +1,1322 @@ +//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAGInstrs class, which implements re-scheduling +// of MachineInstrs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/IR/Operator.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Enable use of AA during MI GAD construction")); + +ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt, + bool IsPostRAFlag, + LiveIntervals *lis) + : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), + IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { + assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); + DbgValues.clear(); + assert(!(IsPostRA && MRI.getNumVirtRegs()) && + "Virtual registers must be removed prior to PostRA scheduling"); + + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); +} + +/// getUnderlyingObjectFromInt - This is the function that does the work of +/// looking through basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObjectFromInt(const Value *V) { + do { + if (const Operator *U = dyn_cast<Operator>(V)) { + // If we find a ptrtoint, we can transfer control back to the + // regular getUnderlyingObjectFromInt. + if (U->getOpcode() == Instruction::PtrToInt) + return U->getOperand(0); + // If we find an add of a constant, a multiplied value, or a phi, it's + // likely that the other operand will lead us to the base + // object. We don't have to worry about the case where the + // object address is somehow being computed by the multiply, + // because our callers only care when the result is an + // identifiable object. + if (U->getOpcode() != Instruction::Add || + (!isa<ConstantInt>(U->getOperand(1)) && + Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && + !isa<PHINode>(U->getOperand(1)))) + return V; + V = U->getOperand(0); + } else { + return V; + } + assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); + } while (1); +} + +/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects +/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. +static void getUnderlyingObjects(const Value *V, + SmallVectorImpl<Value *> &Objects) { + SmallPtrSet<const Value*, 16> Visited; + SmallVector<const Value *, 4> Working(1, V); + do { + V = Working.pop_back_val(); + + SmallVector<Value *, 4> Objs; + GetUnderlyingObjects(const_cast<Value *>(V), Objs); + + for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { + V = *I; + if (!Visited.insert(V)) + continue; + if (Operator::getOpcode(V) == Instruction::IntToPtr) { + const Value *O = + getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); + if (O->getType()->isPointerTy()) { + Working.push_back(O); + continue; + } + } + Objects.push_back(const_cast<Value *>(V)); + } + } while (!Working.empty()); +} + +/// getUnderlyingObjectsForInstr - If this machine instr has memory reference +/// information and it can be tracked to a normal reference to a known +/// object, return the Value for that object. +static void getUnderlyingObjectsForInstr(const MachineInstr *MI, + const MachineFrameInfo *MFI, + SmallVectorImpl<std::pair<const Value *, bool> > &Objects) { + if (!MI->hasOneMemOperand() || + !(*MI->memoperands_begin())->getValue() || + (*MI->memoperands_begin())->isVolatile()) + return; + + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return; + + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(V, Objs); + + for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { + bool MayAlias = true; + V = *I; + + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + + if (PSV->isAliased(MFI)) { + Objects.clear(); + return; + } + + MayAlias = PSV->mayAlias(MFI); + } else if (!isIdentifiedObject(V)) { + Objects.clear(); + return; + } + + Objects.push_back(std::make_pair(V, MayAlias)); + } +} + +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { + BB = bb; +} + +void ScheduleDAGInstrs::finishBlock() { + // Subclasses should no longer refer to the old block. + BB = 0; +} + +/// Initialize the DAG and common scheduler state for the current scheduling +/// region. This does not actually create the DAG, only clears it. The +/// scheduling driver may call BuildSchedGraph multiple times per scheduling +/// region. +void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + assert(bb == BB && "startBlock should set BB"); + RegionBegin = begin; + RegionEnd = end; + EndIndex = endcount; + MISUnitMap.clear(); + + ScheduleDAG::clearDAG(); +} + +/// Close the current scheduling region. Don't clear any state in case the +/// driver wants to refer to the previous scheduling region. +void ScheduleDAGInstrs::exitRegion() { + // Nothing to do. +} + +/// addSchedBarrierDeps - Add dependencies from instructions in the current +/// list of instructions being scheduled to scheduling barrier by adding +/// the exit SU to the register defs and use list. This is because we want to +/// make sure instructions which define registers that are either used by +/// the terminator or are live-out are properly scheduled. This is +/// especially important when the definition latency of the return value(s) +/// are too high to be hidden by the branch or when the liveout registers +/// used by instructions in the fallthrough block. +void ScheduleDAGInstrs::addSchedBarrierDeps() { + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; + ExitSU.setInstr(ExitMI); + bool AllDepKnown = ExitMI && + (ExitMI->isCall() || ExitMI->isBarrier()); + if (ExitMI && AllDepKnown) { + // If it's a call or a barrier, add dependencies on the defs and uses of + // instruction. + for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = ExitMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TRI->isPhysicalRegister(Reg)) + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); + else { + assert(!IsPostRA && "Virtual register encountered after regalloc."); + if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(&ExitSU, i); + } + } + } else { + // For others, e.g. fallthrough, conditional branch, assume the exit + // uses all the registers that are livein to the successor blocks. + assert(Uses.empty() && "Uses in set before adding deps?"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + if (!Uses.contains(Reg)) + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); + } + } +} + +/// MO is an operand of SU's instruction that defines a physical register. Add +/// data dependencies from SU to any uses of the physical register. +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { + const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); + assert(MO.isDef() && "expect physreg def"); + + // Ask the target if address-backscheduling is desirable, and if so how much. + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { + if (!Uses.contains(*Alias)) + continue; + for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { + SUnit *UseSU = I->SU; + if (UseSU == SU) + continue; + + // Adjust the dependence latency using operand def/use information, + // then allow the target to perform its own adjustments. + int UseOp = I->OpIdx; + MachineInstr *RegUse = 0; + SDep Dep; + if (UseOp < 0) + Dep = SDep(SU, SDep::Artificial); + else { + Dep = SDep(SU, SDep::Data, *Alias); + RegUse = UseSU->getInstr(); + Dep.setMinLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/true)); + } + Dep.setLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/false)); + + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); + } + } +} + +/// addPhysRegDeps - Add register dependencies (data, anti, and output) from +/// this SUnit to following instructions in the same scheduling region that +/// depend the physical register referenced at OperIdx. +void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + + // Optionally add output and anti dependencies. For anti + // dependencies we use a latency of 0 because for a multi-issue + // target we want to allow the defining instruction to issue + // in the same cycle as the using instruction. + // TODO: Using a latency of 1 here for output dependencies assumes + // there's no cost for reusing registers. + SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { + if (!Defs.contains(*Alias)) + continue; + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; + if (DefSU == &ExitSU) + continue; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(*Alias))) { + if (Kind == SDep::Anti) + DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); + else { + SDep Dep(SU, Kind, /*Reg=*/*Alias); + unsigned OutLatency = + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); + Dep.setMinLatency(OutLatency); + Dep.setLatency(OutLatency); + DefSU->addPred(Dep); + } + } + } + } + + if (!MO.isDef()) { + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's uses. + // Push this SUnit on the use list. + Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); + } + else { + addPhysRegDataDeps(SU, OperIdx); + unsigned Reg = MO.getReg(); + + // clear this register's use list + if (Uses.contains(Reg)) + Uses.eraseAll(Reg); + + if (!MO.isDead()) { + Defs.eraseAll(Reg); + } else if (SU->isCall) { + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (!I->SU->isCall) + break; + I = Defs.erase(I); + } + } + + // Defs are pushed in the order they are visited and never reordered. + Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); + } +} + +/// addVRegDefDeps - Add register output and data dependencies from this SUnit +/// to instructions that occur later in the same scheduling region if they read +/// from or write to the virtual register defined at OperIdx. +/// +/// TODO: Hoist loop induction variable increments. This has to be +/// reevaluated. Generally, IV scheduling should be done before coalescing. +void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { + const MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); + + // Singly defined vregs do not have output/anti dependencies. + // The current operand is a def, so we have at least one. + // Check here if there are any others... + if (MRI.hasOneDef(Reg)) + return; + + // Add output dependence to the next nearest def of this vreg. + // + // Unless this definition is dead, the output dependence should be + // transitively redundant with antidependencies from this definition's + // uses. We're conservative for now until we have a way to guarantee the uses + // are not eliminated sometime during scheduling. The output dependence edge + // is also useful if output latency exceeds def-use latency. + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); + if (DefI == VRegDefs.end()) + VRegDefs.insert(VReg2SUnit(Reg, SU)); + else { + SUnit *DefSU = DefI->SU; + if (DefSU != SU && DefSU != &ExitSU) { + SDep Dep(SU, SDep::Output, Reg); + unsigned OutLatency = + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); + Dep.setMinLatency(OutLatency); + Dep.setLatency(OutLatency); + DefSU->addPred(Dep); + } + DefI->SU = SU; + } +} + +/// addVRegUseDeps - Add a register data dependency if the instruction that +/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a +/// register antidependency from this SUnit to instructions that occur later in +/// the same scheduling region if they write the virtual register. +/// +/// TODO: Handle ExitSU "uses" properly. +void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); + + // Lookup this operand's reaching definition. + assert(LIS && "vreg dependencies requires LiveIntervals"); + LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + VNInfo *VNI = LRQ.valueIn(); + + // VNI will be valid because MachineOperand::readsReg() is checked by caller. + assert(VNI && "No value to read by operand"); + MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); + // Phis and other noninstructions (after coalescing) have a NULL Def. + if (Def) { + SUnit *DefSU = getSUnit(Def); + if (DefSU) { + // The reaching Def lives within this scheduling region. + // Create a data dependence. + SDep dep(DefSU, SDep::Data, Reg); + // Adjust the dependence latency using operand def/use information, then + // allow the target to perform its own adjustments. + int DefOp = Def->findRegisterDefOperandIdx(Reg); + dep.setLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); + dep.setMinLatency( + SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); + + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); + SU->addPred(dep); + } + } + + // Add antidependence to the following def of the vreg it uses. + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); + if (DefI != VRegDefs.end() && DefI->SU != SU) + DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); +} + +/// Return true if MI is an instruction we are unable to reason about +/// (like a call or something with unmodeled side effects). +static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { + if (MI->isCall() || MI->hasUnmodeledSideEffects() || + (MI->hasOrderedMemoryRef() && + (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) + return true; + return false; +} + +// This MI might have either incomplete info, or known to be unsafe +// to deal with (i.e. volatile object). +static inline bool isUnsafeMemoryObject(MachineInstr *MI, + const MachineFrameInfo *MFI) { + if (!MI || MI->memoperands_empty()) + return true; + // We purposefully do no check for hasOneMemOperand() here + // in hope to trigger an assert downstream in order to + // finish implementation. + if ((*MI->memoperands_begin())->isVolatile() || + MI->hasUnmodeledSideEffects()) + return true; + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return true; + + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(V, Objs); + for (SmallVector<Value *, 4>::iterator I = Objs.begin(), + IE = Objs.end(); I != IE; ++I) { + V = *I; + + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return true; + } + + // Does this pointer refer to a distinct and identifiable object? + if (!isIdentifiedObject(V)) + return true; + } + + return false; +} + +/// This returns true if the two MIs need a chain edge betwee them. +/// If these are not even memory operations, we still may need +/// chain deps between them. The question really is - could +/// these two MIs be reordered during scheduling from memory dependency +/// point of view. +static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, + MachineInstr *MIa, + MachineInstr *MIb) { + // Cover a trivial case - no edge is need to itself. + if (MIa == MIb) + return false; + + if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) + return true; + + // If we are dealing with two "normal" loads, we do not need an edge + // between them - they could be reordered. + if (!MIa->mayStore() && !MIb->mayStore()) + return false; + + // To this point analysis is generic. From here on we do need AA. + if (!AA) + return true; + + MachineMemOperand *MMOa = *MIa->memoperands_begin(); + MachineMemOperand *MMOb = *MIb->memoperands_begin(); + + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + llvm_unreachable("Multiple memory operands."); + + // The following interface to AA is fashioned after DAGCombiner::isAlias + // and operates with MachineMemOperand offset with some important + // assumptions: + // - LLVM fundamentally assumes flat address spaces. + // - MachineOperand offset can *only* result from legalization and + // cannot affect queries other than the trivial case of overlap + // checking. + // - These offsets never wrap and never step outside + // of allocated objects. + // - There should never be any negative offsets here. + // + // FIXME: Modify API to hide this math from "user" + // FIXME: Even before we go to AA we can reason locally about some + // memory objects. It can save compile time, and possibly catch some + // corner cases not currently covered. + + assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + + int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); + int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; + int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + + AliasAnalysis::AliasResult AAResult = AA->alias( + AliasAnalysis::Location(MMOa->getValue(), Overlapa, + MMOa->getTBAAInfo()), + AliasAnalysis::Location(MMOb->getValue(), Overlapb, + MMOb->getTBAAInfo())); + + return (AAResult != AliasAnalysis::NoAlias); +} + +/// This recursive function iterates over chain deps of SUb looking for +/// "latest" node that needs a chain edge to SUa. +static unsigned +iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, + SmallPtrSet<const SUnit*, 16> &Visited) { + if (!SUa || !SUb || SUb == ExitSU) + return *Depth; + + // Remember visited nodes. + if (!Visited.insert(SUb)) + return *Depth; + // If there is _some_ dependency already in place, do not + // descend any further. + // TODO: Need to make sure that if that dependency got eliminated or ignored + // for any reason in the future, we would not violate DAG topology. + // Currently it does not happen, but makes an implicit assumption about + // future implementation. + // + // Independently, if we encounter node that is some sort of global + // object (like a call) we already have full set of dependencies to it + // and we can stop descending. + if (SUa->isSucc(SUb) || + isGlobalMemoryObject(AA, SUb->getInstr())) + return *Depth; + + // If we do need an edge, or we have exceeded depth budget, + // add that edge to the predecessors chain of SUb, + // and stop descending. + if (*Depth > 200 || + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + SUb->addPred(SDep(SUa, SDep::MayAliasMem)); + return *Depth; + } + // Track current depth. + (*Depth)++; + // Iterate over chain dependencies only. + for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); + return *Depth; +} + +/// This function assumes that "downward" from SU there exist +/// tail/leaf of already constructed DAG. It iterates downward and +/// checks whether SU can be aliasing any node dominated +/// by it. +static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList, + unsigned LatencyToLoad) { + if (!SU) + return; + + SmallPtrSet<const SUnit*, 16> Visited; + unsigned Depth = 0; + + for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); + I != IE; ++I) { + if (SU == *I) + continue; + if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { + SDep Dep(SU, SDep::MayAliasMem); + Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); + (*I)->addPred(Dep); + } + // Now go through all the chain successors and iterate from them. + // Keep track of visited nodes. + for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), + JE = (*I)->Succs.end(); J != JE; ++J) + if (J->isCtrl()) + iterateChainSucc (AA, MFI, SU, J->getSUnit(), + ExitSU, &Depth, Visited); + } +} + +/// Check whether two objects need a chain edge, if so, add it +/// otherwise remember the rejected SU. +static inline +void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SUa, SUnit *SUb, + std::set<SUnit *> &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { + // If this is a false dependency, + // do not add the edge, but rememeber the rejected node. + if (!EnableAASchedMI || + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + SUb->addPred(Dep); + } + else { + // Duplicate entries should be ignored. + RejectList.insert(SUb); + DEBUG(dbgs() << "\tReject chain dep between SU(" + << SUa->NodeNum << ") and SU(" + << SUb->NodeNum << ")\n"); + } +} + +/// Create an SUnit for each real instruction, numbered in top-down toplological +/// order. The instruction order A < B, implies that no edge exists from B to A. +/// +/// Map each real instruction to its SUnit. +/// +/// After initSUnits, the SUnits vector cannot be resized and the scheduler may +/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs +/// instead of pointers. +/// +/// MachineScheduler relies on initSUnits numbering the nodes by their order in +/// the original instruction list. +void ScheduleDAGInstrs::initSUnits() { + // We'll be allocating one SUnit for each real instruction in the region, + // which is contained within a basic block. + SUnits.reserve(BB->size()); + + for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SUnit *SU = newSUnit(MI); + MISUnitMap[MI] = SU; + + SU->isCall = MI->isCall(); + SU->isCommutable = MI->isCommutable(); + + // Assign the Latency field of SU using target-provided information. + SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); + } +} + +/// If RegPressure is non null, compute register pressure as a side effect. The +/// DAG builder is an efficient place to do it because it already visits +/// operands. +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, + RegPressureTracker *RPTracker) { + // Create an SUnit for each real instruction. + initSUnits(); + + // We build scheduling units by walking a block's instruction list from bottom + // to top. + + // Remember where a generic side-effecting instruction is as we procede. + SUnit *BarrierChain = 0, *AliasChain = 0; + + // Memory references to specific known memory locations are tracked + // so that they can be given more precise dependencies. We track + // separately the known memory locations that may alias and those + // that are known not to alias + MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; + MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; + std::set<SUnit*> RejectMemNodes; + + // Remove any stale debug info; sometimes BuildSchedGraph is called again + // without emitting the info from the previous call. + DbgValues.clear(); + FirstDbgValue = NULL; + + assert(Defs.empty() && Uses.empty() && + "Only BuildGraph should update Defs/Uses"); + Defs.setUniverse(TRI->getNumRegs()); + Uses.setUniverse(TRI->getNumRegs()); + + assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); + // FIXME: Allow SparseSet to reserve space for the creation of virtual + // registers during scheduling. Don't artificially inflate the Universe + // because we want to assert that vregs are not created during DAG building. + VRegDefs.setUniverse(MRI.getNumVirtRegs()); + + // Model data dependencies between instructions being scheduled and the + // ExitSU. + addSchedBarrierDeps(); + + // Walk the list of instructions, from bottom moving up. + MachineInstr *DbgMI = NULL; + for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; + MII != MIE; --MII) { + MachineInstr *MI = prior(MII); + if (MI && DbgMI) { + DbgValues.push_back(std::make_pair(DbgMI, MI)); + DbgMI = NULL; + } + + if (MI->isDebugValue()) { + DbgMI = MI; + continue; + } + if (RPTracker) { + RPTracker->recede(); + assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); + } + + assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && + "Cannot schedule terminators or labels!"); + + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); + + // Add register-based dependencies (data, anti, and output). + bool HasVRegDef = false; + for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TRI->isPhysicalRegister(Reg)) + addPhysRegDeps(SU, j); + else { + assert(!IsPostRA && "Virtual register encountered!"); + if (MO.isDef()) { + HasVRegDef = true; + addVRegDefDeps(SU, j); + } + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(SU, j); + } + } + // If we haven't seen any uses in this scheduling region, create a + // dependence edge to ExitSU to model the live-out latency. This is required + // for vreg defs with no in-region use, and prefetches with no vreg def. + // + // FIXME: NumDataSuccs would be more precise than NumSuccs here. This + // check currently relies on being called before adding chain deps. + if (SU->NumSuccs == 0 && SU->Latency > 1 + && (HasVRegDef || MI->mayLoad())) { + SDep Dep(SU, SDep::Artificial); + Dep.setLatency(SU->Latency - 1); + ExitSU.addPred(Dep); + } + + // Add chain dependencies. + // Chain dependencies used to enforce memory order should have + // latency of 0 (except for true dependency of Store followed by + // aliased Load... we estimate that with a single cycle of latency + // assuming the hardware will bypass) + // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable + // after stack slots are lowered to actual addresses. + // TODO: Use an AliasAnalysis and do real alias-analysis queries, and + // produce more precise dependence information. + unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; + if (isGlobalMemoryObject(AA, MI)) { + // Be conservative with these and add dependencies on all memory + // references, even those that are known to not alias. + for (MapVector<const Value *, SUnit *>::iterator I = + NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { + I->second->addPred(SDep(SU, SDep::Barrier)); + } + for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = + NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + SDep Dep(SU, SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + I->second[i]->addPred(Dep); + } + } + // Add SU to the barrier chain. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); + BarrierChain = SU; + // This is a barrier event that acts as a pivotal node in the DAG, + // so it is safe to clear list of exposed nodes. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + RejectMemNodes.clear(); + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); + + // fall-through + new_alias_chain: + // Chain all possibly aliasing memory references though SU. + if (AliasChain) { + unsigned ChainLatency = 0; + if (AliasChain->getInstr()->mayLoad()) + ChainLatency = TrueMemOrderLatency; + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, + ChainLatency); + } + AliasChain = SU; + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), + E = AliasMemDefs.end(); I != E; ++I) + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = + AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, + TrueMemOrderLatency); + } + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + PendingLoads.clear(); + AliasMemDefs.clear(); + AliasMemUses.clear(); + } else if (MI->mayStore()) { + SmallVector<std::pair<const Value *, bool>, 4> Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs); + + if (Objs.empty()) { + // Treat all other stores conservatively. + goto new_alias_chain; + } + + bool MayAlias = false; + for (SmallVector<std::pair<const Value *, bool>, 4>::iterator + K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { + const Value *V = K->first; + bool ThisMayAlias = K->second; + if (ThisMayAlias) + MayAlias = true; + + // A store to a specific PseudoSourceValue. Add precise dependencies. + // Record the def in MemDefs, first adding a dep if there is + // an existing def. + MapVector<const Value *, SUnit *>::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector<const Value *, SUnit *>::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) { + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + I->second = SU; + } else { + if (ThisMayAlias) + AliasMemDefs[V] = SU; + else + NonAliasMemDefs[V] = SU; + } + // Handle the uses in MemUses, if there are any. + MapVector<const Value *, std::vector<SUnit *> >::iterator J = + ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + MapVector<const Value *, std::vector<SUnit *> >::iterator JE = + ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + if (J != JE) { + for (unsigned i = 0, e = J->second.size(); i != e; ++i) + addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, + TrueMemOrderLatency, true); + J->second.clear(); + } + } + if (MayAlias) { + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + // Add dependence on alias chain, if needed. + if (AliasChain) + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + // But we also should check dependent instructions for the + // SU in question. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + } + // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); + + if (!ExitSU.isPred(SU)) + // Push store's up a bit to avoid them getting in between cmp + // and branches. + ExitSU.addPred(SDep(SU, SDep::Artificial)); + } else if (MI->mayLoad()) { + bool MayAlias = true; + if (MI->isInvariantLoad(AA)) { + // Invariant load, no chain dependencies needed! + } else { + SmallVector<std::pair<const Value *, bool>, 4> Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs); + + if (Objs.empty()) { + // A load with no underlying object. Depend on all + // potentially aliasing stores. + for (MapVector<const Value *, SUnit *>::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + + PendingLoads.push_back(SU); + MayAlias = true; + } else { + MayAlias = false; + } + + for (SmallVector<std::pair<const Value *, bool>, 4>::iterator + J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { + const Value *V = J->first; + bool ThisMayAlias = J->second; + + if (ThisMayAlias) + MayAlias = true; + + // A load from a specific PseudoSourceValue. Add precise dependencies. + MapVector<const Value *, SUnit *>::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector<const Value *, SUnit *>::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + if (ThisMayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); + } + if (MayAlias) + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); + // Add dependencies on alias and barrier chains, if needed. + if (MayAlias && AliasChain) + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); + } + } + } + if (DbgMI) + FirstDbgValue = DbgMI; + + Defs.clear(); + Uses.clear(); + VRegDefs.clear(); + PendingLoads.clear(); +} + +void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + SU->getInstr()->dump(); +#endif +} + +std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream oss(s); + if (SU == &EntrySU) + oss << "<entry>"; + else if (SU == &ExitSU) + oss << "<exit>"; + else + SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); + return oss.str(); +} + +/// Return the basic block label. It is not necessarilly unique because a block +/// contains multiple scheduling regions. But it is fine for visualization. +std::string ScheduleDAGInstrs::getDAGName() const { + return "dag." + BB->getFullName(); +} + +//===----------------------------------------------------------------------===// +// SchedDFSResult Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { +/// \brief Internal state used to compute SchedDFSResult. +class SchedDFSImpl { + SchedDFSResult &R; + + /// Join DAG nodes into equivalence classes by their subtree. + IntEqClasses SubtreeClasses; + /// List PredSU, SuccSU pairs that represent data edges between subtrees. + std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; + + struct RootData { + unsigned NodeID; + unsigned ParentNodeID; // Parent node (member of the parent subtree). + unsigned SubInstrCount; // Instr count in this tree only, not children. + + RootData(unsigned id): NodeID(id), + ParentNodeID(SchedDFSResult::InvalidSubtreeID), + SubInstrCount(0) {} + + unsigned getSparseSetIndex() const { return NodeID; } + }; + + SparseSet<RootData> RootSet; + +public: + SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { + RootSet.setUniverse(R.DFSNodeData.size()); + } + + /// Return true if this node been visited by the DFS traversal. + /// + /// During visitPostorderNode the Node's SubtreeID is assigned to the Node + /// ID. Later, SubtreeID is updated but remains valid. + bool isVisited(const SUnit *SU) const { + return R.DFSNodeData[SU->NodeNum].SubtreeID + != SchedDFSResult::InvalidSubtreeID; + } + + /// Initialize this node's instruction count. We don't need to flag the node + /// visited until visitPostorder because the DAG cannot have cycles. + void visitPreorder(const SUnit *SU) { + R.DFSNodeData[SU->NodeNum].InstrCount = + SU->getInstr()->isTransient() ? 0 : 1; + } + + /// Called once for each node after all predecessors are visited. Revisit this + /// node's predecessors and potentially join them now that we know the ILP of + /// the other predecessors. + void visitPostorderNode(const SUnit *SU) { + // Mark this node as the root of a subtree. It may be joined with its + // successors later. + R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; + RootData RData(SU->NodeNum); + RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; + + // If any predecessors are still in their own subtree, they either cannot be + // joined or are large enough to remain separate. If this parent node's + // total instruction count is not greater than a child subtree by at least + // the subtree limit, then try to join it now since splitting subtrees is + // only useful if multiple high-pressure paths are possible. + unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->getKind() != SDep::Data) + continue; + unsigned PredNum = PI->getSUnit()->NodeNum; + if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) + joinPredSubtree(*PI, SU, /*CheckLimit=*/false); + + // Either link or merge the TreeData entry from the child to the parent. + if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { + // If the predecessor's parent is invalid, this is a tree edge and the + // current node is the parent. + if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) + RootSet[PredNum].ParentNodeID = SU->NodeNum; + } + else if (RootSet.count(PredNum)) { + // The predecessor is not a root, but is still in the root set. This + // must be the new parent that it was just joined to. Note that + // RootSet[PredNum].ParentNodeID may either be invalid or may still be + // set to the original parent. + RData.SubInstrCount += RootSet[PredNum].SubInstrCount; + RootSet.erase(PredNum); + } + } + RootSet[SU->NodeNum] = RData; + } + + /// Called once for each tree edge after calling visitPostOrderNode on the + /// predecessor. Increment the parent node's instruction count and + /// preemptively join this subtree to its parent's if it is small enough. + void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { + R.DFSNodeData[Succ->NodeNum].InstrCount + += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; + joinPredSubtree(PredDep, Succ); + } + + /// Add a connection for cross edges. + void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { + ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); + } + + /// Set each node's subtree ID to the representative ID and record connections + /// between trees. + void finalize() { + SubtreeClasses.compress(); + R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); + assert(SubtreeClasses.getNumClasses() == RootSet.size() + && "number of roots should match trees"); + for (SparseSet<RootData>::const_iterator + RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { + unsigned TreeID = SubtreeClasses[RI->NodeID]; + if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) + R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; + R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; + // Note that SubInstrCount may be greater than InstrCount if we joined + // subtrees across a cross edge. InstrCount will be attributed to the + // original parent, while SubInstrCount will be attributed to the joined + // parent. + } + R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); + R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); + DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); + for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { + R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; + DEBUG(dbgs() << " SU(" << Idx << ") in tree " + << R.DFSNodeData[Idx].SubtreeID << '\n'); + } + for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator + I = ConnectionPairs.begin(), E = ConnectionPairs.end(); + I != E; ++I) { + unsigned PredTree = SubtreeClasses[I->first->NodeNum]; + unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; + if (PredTree == SuccTree) + continue; + unsigned Depth = I->first->getDepth(); + addConnection(PredTree, SuccTree, Depth); + addConnection(SuccTree, PredTree, Depth); + } + } + +protected: + /// Join the predecessor subtree with the successor that is its DFS + /// parent. Apply some heuristics before joining. + bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, + bool CheckLimit = true) { + assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); + + // Check if the predecessor is already joined. + const SUnit *PredSU = PredDep.getSUnit(); + unsigned PredNum = PredSU->NodeNum; + if (R.DFSNodeData[PredNum].SubtreeID != PredNum) + return false; + + // Four is the magic number of successors before a node is considered a + // pinch point. + unsigned NumDataSucs = 0; + for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), + SE = PredSU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data) { + if (++NumDataSucs >= 4) + return false; + } + } + if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) + return false; + R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; + SubtreeClasses.join(Succ->NodeNum, PredNum); + return true; + } + + /// Called by finalize() to record a connection between trees. + void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { + if (!Depth) + return; + + do { + SmallVectorImpl<SchedDFSResult::Connection> &Connections = + R.SubtreeConnections[FromTree]; + for (SmallVectorImpl<SchedDFSResult::Connection>::iterator + I = Connections.begin(), E = Connections.end(); I != E; ++I) { + if (I->TreeID == ToTree) { + I->Level = std::max(I->Level, Depth); + return; + } + } + Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); + FromTree = R.DFSTreeData[FromTree].ParentTreeID; + } while (FromTree != SchedDFSResult::InvalidSubtreeID); + } +}; +} // namespace llvm + +namespace { +/// \brief Manage the stack used by a reverse depth-first search over the DAG. +class SchedDAGReverseDFS { + std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; +public: + bool isComplete() const { return DFSStack.empty(); } + + void follow(const SUnit *SU) { + DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); + } + void advance() { ++DFSStack.back().second; } + + const SDep *backtrack() { + DFSStack.pop_back(); + return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second); + } + + const SUnit *getCurr() const { return DFSStack.back().first; } + + SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } + + SUnit::const_pred_iterator getPredEnd() const { + return getCurr()->Preds.end(); + } +}; +} // anonymous + +static bool hasDataSucc(const SUnit *SU) { + for (SUnit::const_succ_iterator + SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) + return true; + } + return false; +} + +/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first +/// search from this root. +void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { + if (!IsBottomUp) + llvm_unreachable("Top-down ILP metric is unimplemnted"); + + SchedDFSImpl Impl(*this); + for (ArrayRef<SUnit>::const_iterator + SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { + const SUnit *SU = &*SI; + if (Impl.isVisited(SU) || hasDataSucc(SU)) + continue; + + SchedDAGReverseDFS DFS; + Impl.visitPreorder(SU); + DFS.follow(SU); + for (;;) { + // Traverse the leftmost path as far as possible. + while (DFS.getPred() != DFS.getPredEnd()) { + const SDep &PredDep = *DFS.getPred(); + DFS.advance(); + // Ignore non-data edges. + if (PredDep.getKind() != SDep::Data + || PredDep.getSUnit()->isBoundaryNode()) { + continue; + } + // An already visited edge is a cross edge, assuming an acyclic DAG. + if (Impl.isVisited(PredDep.getSUnit())) { + Impl.visitCrossEdge(PredDep, DFS.getCurr()); + continue; + } + Impl.visitPreorder(PredDep.getSUnit()); + DFS.follow(PredDep.getSUnit()); + } + // Visit the top of the stack in postorder and backtrack. + const SUnit *Child = DFS.getCurr(); + const SDep *PredDep = DFS.backtrack(); + Impl.visitPostorderNode(Child); + if (PredDep) + Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); + if (DFS.isComplete()) + break; + } + } + Impl.finalize(); +} + +/// The root of the given SubtreeID was just scheduled. For all subtrees +/// connected to this tree, record the depth of the connection so that the +/// nearest connected subtrees can be prioritized. +void SchedDFSResult::scheduleTree(unsigned SubtreeID) { + for (SmallVectorImpl<Connection>::const_iterator + I = SubtreeConnections[SubtreeID].begin(), + E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { + SubtreeConnectLevels[I->TreeID] = + std::max(SubtreeConnectLevels[I->TreeID], I->Level); + DEBUG(dbgs() << " Tree: " << I->TreeID + << " @" << SubtreeConnectLevels[I->TreeID] << '\n'); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ILPValue::print(raw_ostream &OS) const { + OS << InstrCount << " / " << Length << " = "; + if (!Length) + OS << "BADILP"; + else + OS << format("%g", ((double)InstrCount / Length)); +} + +void ILPValue::dump() const { + dbgs() << *this << '\n'; +} + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { + Val.print(OS); + return OS; +} + +} // namespace llvm +#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp new file mode 100644 index 0000000..8ddb3e8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -0,0 +1,100 @@ +//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <fstream> +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter<ScheduleDAG*> &GW) { + return G->addCustomGraphFeatures(GW); + } + }; +} + +std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, + const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); +} + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) { + // This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "ScheduleDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAG::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp new file mode 100644 index 0000000..2cd84d6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -0,0 +1,248 @@ +//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScoreboardHazardRecognizer class, which +// encapsultes hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#ifndef NDEBUG +const char *ScoreboardHazardRecognizer::DebugType = ""; +#endif + +ScoreboardHazardRecognizer:: +ScoreboardHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *SchedDAG, + const char *ParentDebugType) : + ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0), + IssueCount(0) { + +#ifndef NDEBUG + DebugType = ParentDebugType; +#endif + + // Determine the maximum depth of any itinerary. This determines the depth of + // the scoreboard. We always make the scoreboard at least 1 cycle deep to + // avoid dealing with the boundary condition. + unsigned ScoreboardDepth = 1; + if (ItinData && !ItinData->isEmpty()) { + for (unsigned idx = 0; ; ++idx) { + if (ItinData->isEndMarker(idx)) + break; + + const InstrStage *IS = ItinData->beginStage(idx); + const InstrStage *E = ItinData->endStage(idx); + unsigned CurCycle = 0; + unsigned ItinDepth = 0; + for (; IS != E; ++IS) { + unsigned StageDepth = CurCycle + IS->getCycles(); + if (ItinDepth < StageDepth) ItinDepth = StageDepth; + CurCycle += IS->getNextCycles(); + } + + // Find the next power-of-2 >= ItinDepth + while (ItinDepth > ScoreboardDepth) { + ScoreboardDepth *= 2; + // Don't set MaxLookAhead until we find at least one nonzero stage. + // This way, an itinerary with no stages has MaxLookAhead==0, which + // completely bypasses the scoreboard hazard logic. + MaxLookAhead = ScoreboardDepth; + } + } + } + + ReservedScoreboard.reset(ScoreboardDepth); + RequiredScoreboard.reset(ScoreboardDepth); + + // If MaxLookAhead is not set above, then we are not enabled. + if (!isEnabled()) + DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); + else { + // A nonempty itinerary must have a SchedModel. + IssueWidth = ItinData->SchedModel->IssueWidth; + DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " + << ScoreboardDepth << '\n'); + } +} + +void ScoreboardHazardRecognizer::Reset() { + IssueCount = 0; + RequiredScoreboard.reset(); + ReservedScoreboard.reset(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScoreboardHazardRecognizer::Scoreboard::dump() const { + dbgs() << "Scoreboard:\n"; + + unsigned last = Depth - 1; + while ((last > 0) && ((*this)[last] == 0)) + last--; + + for (unsigned i = 0; i <= last; i++) { + unsigned FUs = (*this)[i]; + dbgs() << "\t"; + for (int j = 31; j >= 0; j--) + dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + dbgs() << '\n'; + } +} +#endif + +bool ScoreboardHazardRecognizer::atIssueLimit() const { + if (IssueWidth == 0) + return false; + + return IssueCount == IssueWidth; +} + +ScheduleHazardRecognizer::HazardType +ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + if (!ItinData || ItinData->isEmpty()) + return NoHazard; + + // Note that stalls will be negative for bottom-up scheduling. + int cycle = Stalls; + + // Use the itinerary for the underlying instruction to check for + // free FU's in the scoreboard at the appropriate future cycles. + + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID == NULL) { + // Don't check hazards for non-machineinstr Nodes. + return NoHazard; + } + unsigned idx = MCID->getSchedClass(); + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { + // We must find one of the stage's units free for every cycle the + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + int StageCycle = cycle + (int)i; + if (StageCycle < 0) + continue; + + if (StageCycle >= (int)RequiredScoreboard.getDepth()) { + assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() && + "Scoreboard depth exceeded!"); + // This stage was stalled beyond pipeline depth, so cannot conflict. + break; + } + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[StageCycle]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[StageCycle]; + break; + } + + if (!freeUnits) { + DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); + DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); + DEBUG(DAG->dumpNode(SU)); + return Hazard; + } + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + return NoHazard; +} + +void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { + if (!ItinData || ItinData->isEmpty()) + return; + + // Use the itinerary for the underlying instruction to reserve FU's + // in the scoreboard at the appropriate future cycles. + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + assert(MCID && "The scheduler must filter non-machineinstrs"); + if (DAG->TII->isZeroCost(MCID->Opcode)) + return; + + ++IssueCount; + + unsigned cycle = 0; + + unsigned idx = MCID->getSchedClass(); + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { + // We must reserve one of the stage's units for every cycle the + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + // reduce to a single unit + unsigned freeUnit = 0; + do { + freeUnit = freeUnits; + freeUnits = freeUnit & (freeUnit - 1); + } while (freeUnits); + + if (IS->getReservationKind() == InstrStage::Required) + RequiredScoreboard[cycle + i] |= freeUnit; + else + ReservedScoreboard[cycle + i] |= freeUnit; + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + DEBUG(ReservedScoreboard.dump()); + DEBUG(RequiredScoreboard.dump()); +} + +void ScoreboardHazardRecognizer::AdvanceCycle() { + IssueCount = 0; + ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); + RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); +} + +void ScoreboardHazardRecognizer::RecedeCycle() { + IssueCount = 0; + ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0; + ReservedScoreboard.recede(); + RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0; + RequiredScoreboard.recede(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 0000000..eb16095 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,10214 @@ +//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dagcombine" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); + +namespace { + static cl::opt<bool> + CombinerAA("combiner-alias-analysis", cl::Hidden, + cl::desc("Turn on alias analysis during testing")); + + static cl::opt<bool> + CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Include global information in alias analysis")); + +//------------------------------ DAGCombiner ---------------------------------// + + class DAGCombiner { + SelectionDAG &DAG; + const TargetLowering &TLI; + CombineLevel Level; + CodeGenOpt::Level OptLevel; + bool LegalOperations; + bool LegalTypes; + + // Worklist of all of the nodes that need to be simplified. + // + // This has the semantics that when adding to the worklist, + // the item added must be next to be processed. It should + // also only appear once. The naive approach to this takes + // linear time. + // + // To reduce the insert/remove time to logarithmic, we use + // a set and a vector to maintain our worklist. + // + // The set contains the items on the worklist, but does not + // maintain the order they should be visited. + // + // The vector maintains the order nodes should be visited, but may + // contain duplicate or removed nodes. When choosing a node to + // visit, we pop off the order stack until we find an item that is + // also in the contents set. All operations are O(log N). + SmallPtrSet<SDNode*, 64> WorkListContents; + SmallVector<SDNode*, 64> WorkListOrder; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis &AA; + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(SDNode *N) { + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + AddToWorkList(*UI); + } + + /// visit - call the node-specific routine that knows how to fold each + /// particular type of node. + SDValue visit(SDNode *N); + + public: + /// AddToWorkList - Add to the work list making sure its instance is at the + /// back (next to be processed.) + void AddToWorkList(SDNode *N) { + WorkListContents.insert(N); + WorkListOrder.push_back(N); + } + + /// removeFromWorkList - remove all instances of N from the worklist. + /// + void removeFromWorkList(SDNode *N) { + WorkListContents.erase(N); + } + + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo = true); + + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, + bool AddTo = true) { + SDValue To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); + + private: + + /// SimplifyDemandedBits - Check the specified integer node value to see if + /// it can be simplified or if things it uses can be simplified by bit + /// propagation. If so, return true. + bool SimplifyDemandedBits(SDValue Op) { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); + return SimplifyDemandedBits(Op, Demanded); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + + void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); + SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); + SDValue SExtPromoteOperand(SDValue Op, EVT PVT); + SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); + SDValue PromoteIntBinOp(SDValue Op); + SDValue PromoteIntShiftOp(SDValue Op); + SDValue PromoteExtend(SDValue Op); + bool PromoteLoad(SDValue Op); + + void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType); + + /// combine - call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDValue combine(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDValue.getNode() == 0 - No change was made + // SDValue.getNode() == N - N was replaced, is dead and has been handled. + // otherwise - N should be replaced by the returned Operand. + // + SDValue visitTokenFactor(SDNode *N); + SDValue visitMERGE_VALUES(SDNode *N); + SDValue visitADD(SDNode *N); + SDValue visitSUB(SDNode *N); + SDValue visitADDC(SDNode *N); + SDValue visitSUBC(SDNode *N); + SDValue visitADDE(SDNode *N); + SDValue visitSUBE(SDNode *N); + SDValue visitMUL(SDNode *N); + SDValue visitSDIV(SDNode *N); + SDValue visitUDIV(SDNode *N); + SDValue visitSREM(SDNode *N); + SDValue visitUREM(SDNode *N); + SDValue visitMULHU(SDNode *N); + SDValue visitMULHS(SDNode *N); + SDValue visitSMUL_LOHI(SDNode *N); + SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSMULO(SDNode *N); + SDValue visitUMULO(SDNode *N); + SDValue visitSDIVREM(SDNode *N); + SDValue visitUDIVREM(SDNode *N); + SDValue visitAND(SDNode *N); + SDValue visitOR(SDNode *N); + SDValue visitXOR(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N); + SDValue SimplifyVUnaryOp(SDNode *N); + SDValue visitSHL(SDNode *N); + SDValue visitSRA(SDNode *N); + SDValue visitSRL(SDNode *N); + SDValue visitCTLZ(SDNode *N); + SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); + SDValue visitCTTZ(SDNode *N); + SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); + SDValue visitCTPOP(SDNode *N); + SDValue visitSELECT(SDNode *N); + SDValue visitSELECT_CC(SDNode *N); + SDValue visitSETCC(SDNode *N); + SDValue visitSIGN_EXTEND(SDNode *N); + SDValue visitZERO_EXTEND(SDNode *N); + SDValue visitANY_EXTEND(SDNode *N); + SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitTRUNCATE(SDNode *N); + SDValue visitBITCAST(SDNode *N); + SDValue visitBUILD_PAIR(SDNode *N); + SDValue visitFADD(SDNode *N); + SDValue visitFSUB(SDNode *N); + SDValue visitFMUL(SDNode *N); + SDValue visitFMA(SDNode *N); + SDValue visitFDIV(SDNode *N); + SDValue visitFREM(SDNode *N); + SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitSINT_TO_FP(SDNode *N); + SDValue visitUINT_TO_FP(SDNode *N); + SDValue visitFP_TO_SINT(SDNode *N); + SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitFP_ROUND(SDNode *N); + SDValue visitFP_ROUND_INREG(SDNode *N); + SDValue visitFP_EXTEND(SDNode *N); + SDValue visitFNEG(SDNode *N); + SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); + SDValue visitBRCOND(SDNode *N); + SDValue visitBR_CC(SDNode *N); + SDValue visitLOAD(SDNode *N); + SDValue visitSTORE(SDNode *N); + SDValue visitINSERT_VECTOR_ELT(SDNode *N); + SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); + SDValue visitBUILD_VECTOR(SDNode *N); + SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitEXTRACT_SUBVECTOR(SDNode *N); + SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitMEMBARRIER(SDNode *N); + + SDValue XformToShuffleWithZero(SDNode *N); + SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + + SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + + bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); + SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans = true); + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); + SDValue BuildSDIV(SDNode *N); + SDValue BuildUDIV(SDNode *N); + SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits = true); + SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue TransformFPLoadStorePair(SDNode *N); + SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); + + SDValue GetDemandedBits(SDValue V, const APInt &Mask); + + /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases); + + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, + const MDNode *TBAAInfo1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2, + const MDNode *TBAAInfo2) const; + + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1); + + /// FindAliasInfo - Extracts the relevant alias information from the memory + /// node. Returns true if the operand was a load. + bool FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset, + unsigned &SrcValueAlignment, + const MDNode *&TBAAInfo) const; + + /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, + /// looking for a better chain (aliasing node.) + SDValue FindBetterChain(SDNode *N, SDValue Chain); + + /// Merge consecutive store operations into a wide store. + /// This optimization uses wide integers or vectors when possible. + /// \return True if some memory operations were changed. + bool MergeConsecutiveStores(StoreSDNode *N); + + public: + DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + + /// Run - runs the dag combiner on all nodes in the work list + void Run(CombineLevel AtLevel); + + SelectionDAG &getDAG() const { return DAG; } + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + EVT getShiftAmountTy(EVT LHSTy) { + return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); + } + + /// isTypeLegal - This method returns true if we are running before type + /// legalization or if the specified VT is legal. + bool isTypeLegal(const EVT &VT) { + if (!LegalTypes) return true; + return TLI.isTypeLegal(VT); + } + }; +} + + +namespace { +/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class WorkListRemover : public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; +public: + explicit WorkListRemover(DAGCombiner &dc) + : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + DC.removeFromWorkList(N); + } +}; +} + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorkList(N); +} + +void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { + ((DAGCombiner*)DC)->removeFromWorkList(N); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); +} + + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); +} + +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isNegatibleForFree - Return 1 if we can compute the negated form of the +/// specified expression for the same cost as the expression itself, or 2 if we +/// can compute the negated form more cheaply than the expression itself. +static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetLowering &TLI, + const TargetOptions *Options, + unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return 2; + + // Don't allow anything with multiple uses. + if (!Op.hasOneUse()) return 0; + + // Don't recurse exponentially. + if (Depth > 6) return 0; + + switch (Op.getOpcode()) { + default: return false; + case ISD::ConstantFP: + // Don't invert constant FP values after legalize. The negated constant + // isn't necessarily legal. + return LegalOperations ? 0 : 1; + case ISD::FADD: + // FIXME: determine better conditions for this xform. + if (!Options->UnsafeFPMath) return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + return 0; + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options->UnsafeFPMath) return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + if (Options->HonorSignDependentRoundingFPMath()) return 0; + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, + Depth + 1); + } +} + +/// GetNegatedExpression - If isNegatibleForFree returns true, this function +/// returns the newly negated expression. +static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + + // Don't allow anything with multiple uses. + assert(Op.hasOneUse() && "Unknown reuse!"); + + assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown code"); + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, Op.getValueType()); + } + case ISD::FADD: + // FIXME: determine better conditions for this xform. + assert(DAG.getTarget().Options.UnsafeFPMath); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1), + Op.getOperand(0)); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + assert(DAG.getTarget().Options.UnsafeFPMath); + + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) + if (N0CFP->getValueAPF().isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0)); + + case ISD::FMUL: + case ISD::FDIV: + assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(0), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1)); + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + } +} + + +// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// that selects between the values 1 and 0, making it equivalent to a setcc. +// Also, set the incoming LHS, RHS, and CC references to the appropriate +// nodes based on the type of node we are checking. This simplifies life a +// bit for the callers. +static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + if (N.getOpcode() == ISD::SELECT_CC && + N.getOperand(2).getOpcode() == ISD::Constant && + N.getOperand(3).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && + cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; + } + return false; +} + +// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only +// one use. If this is true, it allows the users to invert the operation for +// free when it is profitable to do so. +static bool isOneUseSetCC(SDValue N) { + SDValue N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + return true; + return false; +} + +SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, + SDValue N0, SDValue N1) { + EVT VT = N0.getValueType(); + if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { + if (isa<ConstantSDNode>(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N0.getOperand(1)), + cast<ConstantSDNode>(N1)); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } + if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + + if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { + if (isa<ConstantSDNode>(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N1.getOperand(1)), + cast<ConstantSDNode>(N0)); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } + if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N1.getOperand(0), N0); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.1 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + dbgs() << " and " << NumTo-1 << " other values\n"; + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && + "Cannot combine value to value of different type!")); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesWith(N, To); + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + if (To[i].getNode()) { + AddToWorkList(To[i].getNode()); + AddUsersToWorkList(To[i].getNode()); + } + } + } + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + return SDValue(N, 0); +} + +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + // Replace all uses. If any nodes become isomorphic to other nodes and + // are deleted, make sure to remove them from our worklist. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorkList(TLO.New.getNode()); + AddUsersToWorkList(TLO.New.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.getNode()->use_empty()) { + removeFromWorkList(TLO.Old.getNode()); + + // If the operands of this node are only used by the node, they will now + // be dead. Make sure to visit them first to delete dead nodes early. + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + + DAG.DeleteNode(TLO.Old.getNode()); + } +} + +/// SimplifyDemandedBits - Check the specified integer node value to see if +/// it can be simplified or if things it uses can be simplified by bit +/// propagation. If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + APInt KnownZero, KnownOne; + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + return false; + + // Revisit the node. + AddToWorkList(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.2 "; + TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; + TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + + CommitTargetLoweringOpt(TLO); + return true; +} + +void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { + DebugLoc dl = Load->getDebugLoc(); + EVT VT = Load->getValueType(0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + + DEBUG(dbgs() << "\nReplacing.9 "; + Load->dump(&DAG); + dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); + removeFromWorkList(Load); + DAG.DeleteNode(Load); + AddToWorkList(Trunc.getNode()); +} + +SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { + Replace = false; + DebugLoc dl = Op.getDebugLoc(); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) + : LD->getExtensionType(); + Replace = true; + return DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: break; + case ISD::AssertSext: + return DAG.getNode(ISD::AssertSext, dl, PVT, + SExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::AssertZext: + return DAG.getNode(ISD::AssertZext, dl, PVT, + ZExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::Constant: { + unsigned ExtOpc = + Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOpc, dl, PVT, Op); + } + } + + if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) + return SDValue(); + return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); +} + +SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + DAG.getValueType(OldVT)); +} + +SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getZeroExtendInReg(NewOp, dl, OldVT); +} + +/// PromoteIntBinOp - Promote the specified integer binary operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace0 = false; + SDValue N0 = Op.getOperand(0); + SDValue NN0 = PromoteOperand(N0, PVT, Replace0); + if (NN0.getNode() == 0) + return SDValue(); + + bool Replace1 = false; + SDValue N1 = Op.getOperand(1); + SDValue NN1; + if (N0 == N1) + NN1 = NN0; + else { + NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + } + + AddToWorkList(NN0.getNode()); + if (NN1.getNode()) + AddToWorkList(NN1.getNode()); + + if (Replace0) + ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); + if (Replace1) + ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, NN0, NN1)); + } + return SDValue(); +} + +/// PromoteIntShiftOp - Promote the specified integer shift operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace = false; + SDValue N0 = Op.getOperand(0); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(Op.getOperand(0), PVT); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); + else + N0 = PromoteOperand(N0, PVT, Replace); + if (N0.getNode() == 0) + return SDValue(); + + AddToWorkList(N0.getNode()); + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + } + return SDValue(); +} + +SDValue DAGCombiner::PromoteExtend(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + } + return SDValue(); +} + +bool DAGCombiner::PromoteLoad(SDValue Op) { + if (!LegalOperations) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return false; + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return false; + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + DebugLoc dl = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) + : LD->getExtensionType(); + SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + + DEBUG(dbgs() << "\nPromoting "; + N->dump(&DAG); + dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); + removeFromWorkList(N); + DAG.DeleteNode(N); + AddToWorkList(Result.getNode()); + return true; + } + return false; +} + + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(CombineLevel AtLevel) { + // set the instance variables, so that the various visit routines may use it. + Level = AtLevel; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; + + // Add all the dag nodes to the worklist. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) + AddToWorkList(I); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // The root of the dag may dangle to deleted nodes until the dag combiner is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // while the worklist isn't empty, find a node and + // try and combine it. + while (!WorkListContents.empty()) { + SDNode *N; + // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // In order to avoid a linear scan, we use a set (O(log N)) to hold what the + // worklist *should* contain, and check the node we want to visit is should + // actually be visited. + do { + N = WorkListOrder.pop_back_val(); + } while (!WorkListContents.erase(N)); + + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (N->use_empty() && N != &Dummy) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + DAG.DeleteNode(N); + continue; + } + + SDValue RV = combine(N); + + if (RV.getNode() == 0) + continue; + + ++NodesCombined; + + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; + + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getNode()->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + DEBUG(dbgs() << "\nReplacing.3 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + RV.getNode()->dump(&DAG); + dbgs() << '\n'); + + // Transfer debug value. + DAG.TransferDbgValues(SDValue(N, 0), RV); + WorkListRemover DeadNodes(*this); + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode()); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + SDValue OpV = RV; + DAG.ReplaceAllUsesWith(N, &OpV); + } + + // Push the new node and any users onto the worklist + AddToWorkList(RV.getNode()); + AddUsersToWorkList(RV.getNode()); + + // Add any uses of the old node to the worklist in case this node is the + // last one that uses them. They may become dead after this node is + // deleted. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); + DAG.RemoveDeadNodes(); +} + +SDValue DAGCombiner::visit(SDNode *N) { + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::ADDC: return visitADDC(N); + case ISD::SUBC: return visitSUBC(N); + case ISD::ADDE: return visitADDE(N); + case ISD::SUBE: return visitSUBE(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: return visitSREM(N); + case ISD::UREM: return visitUREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SMULO: return visitSMULO(N); + case ISD::UMULO: return visitUMULO(N); + case ISD::SDIVREM: return visitSDIVREM(N); + case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BITCAST: return visitBITCAST(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FMA: return visitFMA(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::MEMBARRIER: return visitMEMBARRIER(N); + } + return SDValue(); +} + +SDValue DAGCombiner::combine(SDNode *N) { + SDValue RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (RV.getNode() == 0) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + // If nothing happened still, try promoting the operation. + if (RV.getNode() == 0) { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + RV = PromoteIntBinOp(SDValue(N, 0)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + RV = PromoteIntShiftOp(SDValue(N, 0)); + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + RV = PromoteExtend(SDValue(N, 0)); + break; + case ISD::LOAD: + if (PromoteLoad(SDValue(N, 0))) + RV = SDValue(N, 0); + break; + } + } + + // If N is a commutative binary node, try commuting it to enable more + // sdisel CSE. + if (RV.getNode() == 0 && + SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Constant operands are canonicalized to RHS. + if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { + SDValue Ops[] = { N1, N0 }; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), + Ops, 2); + if (CSENode) + return SDValue(CSENode, 0); + } + } + + return RV; +} + +/// getInputChainForNode - Given a node, return its input chain if it has one, +/// otherwise return a null sd operand. +static SDValue getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDValue(); +} + +SDValue DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) + return N->getOperand(1); + } + + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. + SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. + SmallPtrSet<SDNode*, 16> SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + SDNode *TF = TFs[i]; + + // Check each of the operands. + for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { + SDValue Op = TF->getOperand(i); + + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // rededundant. + Changed = true; + break; + + case ISD::TokenFactor: + if (Op.hasOneUse() && + std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + // Queue up for processing. + TFs.push_back(Op.getNode()); + // Clean up in case the token factor is removed. + AddToWorkList(Op.getNode()); + Changed = true; + break; + } + // Fall thru + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.getNode())) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + SDValue Result; + + // If we've change things around then replace token factor. + if (Changed) { + if (Ops.empty()) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + // New and improved token factor. + Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, &Ops[0], Ops.size()); + } + + // Don't add users to work list. + return CombineTo(N, Result, false); + } + + return Result; +} + +/// MERGE_VALUES can always be eliminated. +SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { + WorkListRemover DeadNodes(*this); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + // First add the users of this node to the work list so that they + // can be tried again once they have new operands. + AddUsersToWorkList(N); + do { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); + } while (!N->use_empty()); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static +SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, + SelectionDAG &DAG) { + EVT VT = N0.getValueType(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); + + if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && + isa<ConstantSDNode>(N00.getOperand(1))) { + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, + N00.getOperand(0), N01), + DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, + N00.getOperand(1), N01)); + return DAG.getNode(ISD::ADD, DL, VT, N0, N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (add x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + } + + // fold (add x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (add c1, c2) -> c1+c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + // fold (add x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && + GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + GA->getOffset() + + (uint64_t)N1C->getSExtValue()); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N1C && N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), VT), + N0.getOperand(1)); + // reassociate add + SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + if (RADD.getNode() != 0) + return RADD; + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && + cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && + cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + // fold ((B-A)+A) -> B + if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) + return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(0)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(1)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) + if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && + N1.getOperand(0).getOpcode() == ISD::SUB && + N0 == N1.getOperand(0).getOperand(1)) + return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(0), N1.getOperand(1)); + + // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), + DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + } + + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (a+b) -> (a|b) iff a and b share no bits. + if (VT.isInteger() && !VT.isVector()) { + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + } + } + + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + if (Result.getNode()) return Result; + } + + // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) + if (N1.getOpcode() == ISD::SHL && + N1.getOperand(0).getOpcode() == ISD::SUB) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) + if (C->getAPIntValue() == 0) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && + N0.getOperand(0).getOpcode() == ISD::SUB) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) + if (C->getAPIntValue() == 0) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); + + if (N1.getOpcode() == ISD::AND) { + SDValue AndOp0 = N1.getOperand(0); + ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); + unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); + unsigned DestBits = VT.getScalarType().getSizeInBits(); + + // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) + // and similar xforms where the inner op is either ~0 or 0. + if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { + DebugLoc DL = N->getDebugLoc(); + return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); + } + } + + // add (sext i1), X -> sub X, (zext i1) + if (N0.getOpcode() == ISD::SIGN_EXTEND && + N0.getOperand(0).getValueType() == MVT::i1 && + !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { + DebugLoc DL = N->getDebugLoc(); + SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an ADD. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Glue)); + + // canonicalize constant to RHS. + if (N0C && !N1C) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + // fold (addc x, 0) -> x + no carry out + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Glue)); + + // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) + return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Glue)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + N1, N0, CarryIn); + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); + + return SDValue(); +} + +// Since it may not be valid to emit a fold to zero for vector initializers +// check if we can before folding. +static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, bool LegalOperations) { + if (!VT.isVector()) { + return DAG.getConstant(0, VT); + } + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + // Produce a vector of zeros. + SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + std::vector<SDValue> Ops(VT.getVectorNumElements(), El); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, + &Ops[0], Ops.size()); + } + return SDValue(); +} + +SDValue DAGCombiner::visitSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (sub x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (sub x, x) -> 0 + // FIXME: Refactor this and xor and other similar operations together. + if (N0 == N1) + return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + // fold (sub c1, c2) -> c1-c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + // fold (sub x, c) -> (add x, -c) + if (N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), VT)); + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + if (N0C && N0C->isAllOnesValue()) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold A-(A-B) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) + return N1.getOperand(1); + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A + if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), + VT); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + N1.getOperand(0)); + } + // fold ((A+(B+or-C))-B) -> A+or-C + if (N0.getOpcode() == ISD::ADD && + (N0.getOperand(1).getOpcode() == ISD::SUB || + N0.getOperand(1).getOpcode() == ISD::ADD) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::ADD && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B + if (N0.getOpcode() == ISD::SUB && + N0.getOperand(1).getOpcode() == ISD::SUB && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + + // If either operand of a sub is undef, the result is undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + // If the relocation model supports it, consider symbol offsets. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // fold (sub Sym, c) -> Sym-c + if (N1C && GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + GA->getOffset() - + (uint64_t)N1C->getSExtValue()); + // fold (sub Sym+c1, Sym+c2) -> c1-c2 + if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) + if (GA->getGlobal() == GB->getGlobal()) + return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), + VT); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, VT), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, 0) -> x + no borrow + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (N0C && N0C->isAllOnesValue()) + return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (mul c1, c2) -> c1*c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mul x, -1) -> 0-x + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // fold (mul x, (1 << c)) -> x << c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy(N0.getValueType()))); + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { + unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(Log2Val, + getShiftAmountTy(N0.getValueType())))); + } + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N1C && N0.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(1))) { + SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1, N0.getOperand(1)); + AddToWorkList(C3.getNode()); + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDValue Sh(0,0), Y(0,0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getNode()->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N1.getOperand(1)) && + N1.getNode()->hasOneUse()) { + Sh = N1; Y = N0; + } + + if (Sh.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + Mul, Sh.getOperand(1)); + } + } + + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + N0.getOperand(1), N1)); + + // reassociate mul + SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + if (RMUL.getNode() != 0) + return RMUL; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sdiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getAPIntValue() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + N0, N1); + } + // fold (sdiv X, pow2) -> simple ops after legalize + if (N1C && !N1C->isNullValue() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDValue(); + + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + + // Splat the sign bit into the register + SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy(N0.getValueType()))); + AddToWorkList(SGN.getNode()); + + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + DAG.getConstant(VT.getSizeInBits() - lg2, + getShiftAmountTy(SGN.getValueType()))); + SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + AddToWorkList(SRL.getNode()); + AddToWorkList(ADD.getNode()); // Divide by pow2 + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (N1C->getAPIntValue().isNonNegative()) + return SRA; + + AddToWorkList(SRA.getNode()); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), SRA); + } + + // if integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + SDValue Op = BuildSDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (udiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + // fold (udiv x, (1 << c)) -> x >>u c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy(N0.getValueType()))); + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + N1.getOperand(1), + DAG.getConstant(SHC->getAPIntValue() + .logBase2(), + ADDVT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + } + } + } + // fold (udiv x, c) -> alternate + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + SDValue Op = BuildUDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitSREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (srem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (urem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue()-1,VT)); + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + VT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + } + } + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHS(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // fold (mulhs x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + getShiftAmountTy(N0.getValueType()))); + // fold (mulhs x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + // If the type twice as wide is legal, transform the mulhs to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHU(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // fold (mulhu x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhu x, 1) -> 0 + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getConstant(0, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + + return SDValue(); +} + +/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that +/// compute two values. LoOp and HiOp give the opcodes for the two computations +/// that are being performed. Return true if a simplification was made. +/// +SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && + (!LegalOperations || + TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && + (!LegalOperations || + TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return SDValue(); + + // If the two computed results can be simplified separately, separate them. + if (LoExists) { + SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Lo.getNode()); + SDValue LoOpt = combine(Lo.getNode()); + if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && + (!LegalOperations || + TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) + return CombineTo(N, LoOpt, LoOpt); + } + + if (HiExists) { + SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Hi.getNode()); + SDValue HiOpt = combine(Hi.getNode()); + if (HiOpt.getNode() && HiOpt != Hi && + (!LegalOperations || + TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) + return CombineTo(N, HiOpt, HiOpt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); + if (Res.getNode()) return Res; + + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); + if (Res.getNode()) return Res; + + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMULO(SDNode *N) { + // (smulo x, 2) -> (saddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitUMULO(SDNode *N) { + // (umulo x, 2) -> (uaddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with +/// two operands of the same opcode, try to simplify it. +SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + + // Bail early if none of these transforms apply. + if (N0.getNode()->getNumOperands() == 0) return SDValue(); + + // For each of OP in AND/OR/XOR: + // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) + // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) + // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. + EVT Op0VT = N0.getOperand(0).getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + // Avoid infinite looping with PromoteIntBinOp. + (N0.getOpcode() == ISD::ANY_EXTEND && + (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && + !VT.isVector() && + Op0VT == N1.getOperand(0).getValueType() && + (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + } + + // For each of OP in SHL/SRL/SRA/AND... + // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) + // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) + // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || + N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ORNode, N0.getOperand(1)); + } + + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization after type legalization and before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((N0.getOpcode() == ISD::BITCAST || + N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && + Level == AfterLegalizeTypes) { + SDValue In0 = N0.getOperand(0); + SDValue In1 = N1.getOperand(0); + EVT In0Ty = In0.getValueType(); + EVT In1Ty = In1.getValueType(); + DebugLoc DL = N->getDebugLoc(); + // If both incoming values are integers, and the original types are the + // same. + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { + SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); + AddToWorkList(Op.getNode()); + return BC; + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N0.getOperand(1).getOpcode() == ISD::UNDEF && + N1.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + "Inputs to shuffles are not the same type"); + + unsigned NumElts = VT.getVectorNumElements(); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + bool SameMask = true; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx0 = SVN0->getMaskElt(i); + int Idx1 = SVN1->getMaskElt(i); + if (Idx0 != Idx1) { + SameMask = false; + break; + } + } + + if (SameMask) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(Op.getNode()); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + DAG.getUNDEF(VT), &SVN0->getMask()[0]); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitAND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N1.getValueType(); + unsigned BitWidth = VT.getScalarType().getSizeInBits(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (and x, 0) -> 0, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + + // fold (and x, -1) -> x, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N0; + } + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and c1, c2) -> c1&c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + // fold (and x, -1) -> x + if (N1C && N1C->isAllOnesValue()) + return N0; + // if (and x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(BitWidth))) + return DAG.getConstant(0, VT); + // reassociate and + SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + if (RAND.getNode() != 0) + return RAND; + // fold (and (or x, C), D) -> D if (C & D) == D + if (N1C && N0.getOpcode() == ISD::OR) + if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N0Op0 = N0.getOperand(0); + APInt Mask = ~N1C->getAPIntValue(); + Mask = Mask.trunc(N0Op0.getValueSizeInBits()); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + N0.getValueType(), N0Op0); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.getNode(), Zext); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must + // already be zero by virtue of the width of the base type of the load. + // + // the 'X' node here can either be nothing or an extract_vector_elt to catch + // more cases. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getOpcode() == ISD::LOAD) || + N0.getOpcode() == ISD::LOAD) { + LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? + N0 : N0.getOperand(0) ); + + // Get the constant (if applicable) the zero'th operand is being ANDed with. + // This can be a pure constant or a vector splat, in which case we treat the + // vector as a scalar and use the splat value. + APInt Constant = APInt::getNullValue(1); + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + Constant = C->getAPIntValue(); + } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs); + if (IsSplat) { + // Undef bits can contribute to a possible optimisation if set, so + // set them. + SplatValue |= SplatUndef; + + // The splat value may be something like "0x00FFFFFF", which means 0 for + // the first vector value and FF for the rest, repeating. We need a mask + // that will apply equally to all members of the vector, so AND all the + // lanes of the constant together. + EVT VT = Vector->getValueType(0); + unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } + } + + // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is + // actually legal and isn't going to get expanded, else this is a false + // optimisation. + bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getMemoryVT()); + + // Resize the constant to the same size as the original memory access before + // extension. If it is still the AllOnesValue then this AND is completely + // unneeded. + Constant = + Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + + bool B; + switch (Load->getExtensionType()) { + default: B = false; break; + case ISD::EXTLOAD: B = CanZextLoadProfitably; break; + case ISD::ZEXTLOAD: + case ISD::NON_EXTLOAD: B = true; break; + } + + if (B && Constant.isAllOnesValue()) { + // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to + // preserve semantics once we get rid of the AND. + SDValue NewLoad(Load, 0); + if (Load->getExtensionType() == ISD::EXTLOAD) { + NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, + Load->getValueType(0), Load->getDebugLoc(), + Load->getChain(), Load->getBasePtr(), + Load->getOffset(), Load->getMemoryVT(), + Load->getMemOperand()); + // Replace uses of the EXTLOAD with the new ZEXTLOAD. + if (Load->getNumValues() == 3) { + // PRE/POST_INC loads have 3 values. + SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), + NewLoad.getValue(2) }; + CombineTo(Load, To, 3, true); + } else { + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } + } + + // Fold the AND away, taking care not to fold to the old load node if we + // replaced it. + CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; + LoadSDNode *LN0 = HasAnyExt + ? cast<LoadSDNode>(N0.getOperand(0)) + : cast<LoadSDNode>(N0); + if (LN0->getExtensionType() != ISD::SEXTLOAD && + LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { + uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT LoadedVT = LN0->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(LN0, NewLoad, NewLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // Do not change the width of a volatile load. + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = LN0->getOperand(1).getValueType(); + + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + // For big endian targets, we need to add an offset to the pointer + // to load the correct bytes. For little endian systems, we merely + // need to read fewer bytes from the same pointer. + if (TLI.isBigEndian()) { + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } + + AddToWorkList(NewPtr.getNode()); + + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), NewPtr, + LN0->getPointerInfo(), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + Alignment); + AddToWorkList(N); + CombineTo(LN0, Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDValue NewAdd = + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + N0.getOperand(0), DAG.getConstant(ADDC, VT)); + CombineTo(N0.getNode(), NewAdd); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + } + } + + return SDValue(); +} + +/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 +/// +SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + bool LookPassAnd0 = false; + bool LookPassAnd1 = false; + if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) + std::swap(N0, N1); + if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() == ISD::AND) { + if (!N0.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01C || N01C->getZExtValue() != 0xFF00) + return SDValue(); + N0 = N0.getOperand(0); + LookPassAnd0 = true; + } + + if (N1.getOpcode() == ISD::AND) { + if (!N1.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C || N11C->getZExtValue() != 0xFF) + return SDValue(); + N1 = N1.getOperand(0); + LookPassAnd1 = true; + } + + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + if (!N0.getNode()->hasOneUse() || + !N1.getNode()->hasOneUse()) + return SDValue(); + + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N01C || !N11C) + return SDValue(); + if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) + return SDValue(); + + // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) + SDValue N00 = N0->getOperand(0); + if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { + if (!N00.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); + if (!N001C || N001C->getZExtValue() != 0xFF) + return SDValue(); + N00 = N00.getOperand(0); + LookPassAnd0 = true; + } + + SDValue N10 = N1->getOperand(0); + if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { + if (!N10.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); + if (!N101C || N101C->getZExtValue() != 0xFF00) + return SDValue(); + N10 = N10.getOperand(0); + LookPassAnd1 = true; + } + + if (N00 != N10) + return SDValue(); + + // Make sure everything beyond the low halfword is zero since the SRL 16 + // will clear the top bits. + unsigned OpSizeInBits = VT.getSizeInBits(); + if (DemandHighBits && OpSizeInBits > 16 && + (!LookPassAnd0 || !LookPassAnd1) && + !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) + return SDValue(); + + SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + if (OpSizeInBits > 16) + Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + return Res; +} + +/// isBSwapHWordElement - Return true if the specified node is an element +/// that makes up a 32-bit packed halfword byteswap. i.e. +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { + if (!N.getNode()->hasOneUse()) + return false; + + unsigned Opc = N.getOpcode(); + if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) + return false; + + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!N1C) + return false; + + unsigned Num; + switch (N1C->getZExtValue()) { + default: + return false; + case 0xFF: Num = 0; break; + case 0xFF00: Num = 1; break; + case 0xFF0000: Num = 2; break; + case 0xFF000000: Num = 3; break; + } + + // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). + SDValue N0 = N.getOperand(0); + if (Opc == ISD::AND) { + if (Num == 0 || Num == 2) { + // (x >> 8) & 0xff + // (x >> 8) & 0xff0000 + if (N0.getOpcode() != ISD::SRL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { + // (x << 8) & 0xff00 + // (x << 8) & 0xff000000 + if (N0.getOpcode() != ISD::SHL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + } else if (Opc == ISD::SHL) { + // (x & 0xff) << 8 + // (x & 0xff0000) << 8 + if (Num != 0 && Num != 2) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { // Opc == ISD::SRL + // (x & 0xff00) >> 8 + // (x & 0xff000000) >> 8 + if (Num != 1 && Num != 3) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + + if (Parts[Num]) + return false; + + Parts[Num] = N0.getOperand(0).getNode(); + return true; +} + +/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// => (rotl (bswap x), 16) +SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + SmallVector<SDNode*,4> Parts(4, (SDNode*)0); + // Look for either + // (or (or (and), (and)), (or (and), (and))) + // (or (or (or (and), (and)), (and)), (and)) + if (N0.getOpcode() != ISD::OR) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (N1.getOpcode() == ISD::OR && + N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + // (or (or (and), (and)), (or (and), (and))) + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + SDValue N010 = N01.getOperand(0); + if (!isBSwapHWordElement(N010, Parts)) + return SDValue(); + SDValue N011 = N01.getOperand(1); + if (!isBSwapHWordElement(N011, Parts)) + return SDValue(); + } else { + // (or (or (or (and), (and)), (and)), (and)) + if (!isBSwapHWordElement(N1, Parts)) + return SDValue(); + if (!isBSwapHWordElement(N01, Parts)) + return SDValue(); + if (N00.getOpcode() != ISD::OR) + return SDValue(); + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + } + + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue(Parts[0],0)); + + // Result of the bswap should be rotated by 16. If it's not legal, than + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); +} + +SDValue DAGCombiner::visitOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N1.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (or x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + + // fold (or x, -1) -> -1, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N1; + } + + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } + // fold (or c1, c2) -> c1|c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + // fold (or x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (or x, -1) -> -1 + if (N1C && N1C->isAllOnesValue()) + return N1; + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) + return N1; + + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) + SDValue BSwap = MatchBSwapHWord(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + BSwap = MatchBSwapHWordLow(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + + // reassociate or + SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + if (ROR.getNode() != 0) + return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + // iff (c1 & c2) == 0. + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + DAG.getConstant(LHSMask | RHSMask, VT)); + } + } + + // See if this is some rotate idiom. + if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + return SDValue(Rot, 0); + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND) { + if (isa<ConstantSDNode>(Op.getOperand(1))) { + Mask = Op.getOperand(1); + Op = Op.getOperand(0); + } else { + return false; + } + } + + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + + return false; +} + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { + // Must be a legal type. Expanded 'n promoted things won't work with rotates. + EVT VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) return 0; + + // The target must have at least one rotate flavor. + bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); + bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) return 0; + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDValue LHSShift; // The shift. + SDValue LHSMask; // AND value if any. + if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) + return 0; // Not part of a rotate. + + SDValue RHSShift; // The shift. + SDValue RHSMask; // AND value if any. + if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) + return 0; // Not part of a rotate. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return 0; // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return 0; // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask , RHSMask ); + } + + unsigned OpSizeInBits = VT.getSizeInBits(); + SDValue LHSShiftArg = LHSShift.getOperand(0); + SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + if (LHSShiftAmt.getOpcode() == ISD::Constant && + RHSShiftAmt.getOpcode() == ISD::Constant) { + uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != OpSizeInBits) + return 0; + + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.getNode() || RHSMask.getNode()) { + APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + + if (LHSMask.getNode()) { + APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); + Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; + } + if (RHSMask.getNode()) { + APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); + Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; + } + + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + } + + return Rot.getNode(); + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.getNode() || RHSMask.getNode()) + return 0; + + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) + if (RHSShiftAmt.getOpcode() == ISD::SUB && + LHSShiftAmt == RHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } + } + } + + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) + if (LHSShiftAmt.getOpcode() == ISD::SUB && + RHSShiftAmt == LHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } + } + } + + // Look for sign/zext/any-extended or truncate cases: + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + SDValue LExtOp0 = LHSShiftAmt.getOperand(0); + SDValue RExtOp0 = RHSShiftAmt.getOperand(0); + if (RExtOp0.getOpcode() == ISD::SUB && + RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } + } + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, + LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } + } + } + } + + return 0; +} + +SDValue DAGCombiner::visitXOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LHS, RHS, CC; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (xor x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (xor c1, c2) -> c1^c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold (xor x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // reassociate xor + SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + if (RXOR.getNode() != 0) + return RXOR; + + // fold !(x cc y) -> (x !cc y) + if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + bool isInt = LHS.getValueType().isInteger(); + ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + isInt); + + if (!LegalOperations || + TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { + switch (N0.getOpcode()) { + default: + llvm_unreachable("Unhandled SetCC Equivalent!"); + case ISD::SETCC: + return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + case ISD::SELECT_CC: + return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + N0.getOperand(3), NotCC); + } + } + } + + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getNode()->hasOneUse() && + isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDValue V = N0.getOperand(0); + V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + DAG.getConstant(1, V.getValueType())); + AddToWorkList(V.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + } + + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc + if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants + if (N1C && N1C->isAllOnesValue() && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) + if (N1C && N0.getOpcode() == ISD::XOR) { + ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N00C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + DAG.getConstant(N1C->getAPIntValue() ^ + N00C->getAPIntValue(), VT)); + if (N01C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(N1C->getAPIntValue() ^ + N01C->getAPIntValue(), VT)); + } + // fold (xor x, x) -> 0 + if (N0 == N1) + return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // Simplify the expression using non-local knowledge. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// visitShiftByConstant - Handle transforms common to the three shifts, when +/// the shift amount is a constant. +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { + SDNode *LHS = N->getOperand(0).getNode(); + if (!LHS->hasOneUse()) return SDValue(); + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + bool HighBitSet = false; // Can we transform this if the high bit is set? + + switch (LHS->getOpcode()) { + default: return SDValue(); + case ISD::OR: + case ISD::XOR: + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + case ISD::AND: + HighBitSet = true; // We can only transform sra if the high bit is set. + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDValue(); // only shl(add) not sr[al](add). + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + } + + // We require the RHS of the binop to be a constant as well. + ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); + + // FIXME: disable this unless the input to the binop is a shift by a constant. + // If it is not a shift, it pessimizes some common cases like: + // + // void foo(int *X, int i) { X[i & 1235] = 1; } + // int bar(int *X, int i) { return X[i & 255]; } + SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); + if ((BinOpLHSVal->getOpcode() != ISD::SHL && + BinOpLHSVal->getOpcode() != ISD::SRA && + BinOpLHSVal->getOpcode() != ISD::SRL) || + !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) + return SDValue(); + + EVT VT = N->getValueType(0); + + // If this is a signed shift right, and the high bit is modified by the + // logical operation, do not perform the transformation. The highBitSet + // boolean indicates the value of the high bit of the constant which would + // cause it to be modified for this operation. + if (N->getOpcode() == ISD::SRA) { + bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); + if (BinOpRHSSignSet != HighBitSet) + return SDValue(); + } + + // Fold the constants, shifting the binop RHS by the shift amount. + SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + N->getValueType(0), + LHS->getOperand(1), N->getOperand(1)); + + // Create the new shift. + SDValue NewShift = DAG.getNode(N->getOpcode(), + LHS->getOperand(0).getDebugLoc(), + VT, LHS->getOperand(0), N->getOperand(1)); + + // Create the new binop. + return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); +} + +SDValue DAGCombiner::visitSHL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold (shl c1, c2) -> c1<<c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + // fold (shl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (shl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (shl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (shl undef, x) -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + EVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SHL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + + // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) + // For this to be valid, the second form must not preserve any of the bits + // that are shifted out by the inner shift in the first form. This means + // the outer shift size must be >= the number of bits added by the ext. + // As a corollary, we don't care what kind of ext it is. + if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) && + N0.getOperand(0).getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, + DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, + N0.getOperand(0)->getOperand(0)), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + } + + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or + // (and (srl x, (sub c1, c2), MASK) + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2-c1); + Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c2-c1, N1.getValueType())); + } else { + Mask = Mask.lshr(c1-c2); + Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1-c2, N1.getValueType())); + } + return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, + DAG.getConstant(Mask, VT)); + } + } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - + N1C->getZExtValue()), + VT); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + HiBitsMask); + } + + if (N1C) { + SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSHL.getNode()) + return NewSHL; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSRA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold (sra c1, c2) -> (sra c1, c2) + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + // fold (sra 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (sra -1, x) -> -1 + if (N0C && N0C->isAllOnesValue()) + return N0; + // fold (sra x, (setge c, size(x))) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (sra x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*DAG.getContext(), + ExtVT, VT.getVectorNumElements()); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), DAG.getValueType(ExtVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRA) { + if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); + if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(Sum, N1C->getValueType(0))); + } + } + + // fold (sra (shl X, m), (sub result_size, n)) + // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for + // result_size - n != m. + // If truncate is free for the target sext(shl) is likely to result in better + // code. + if (N0.getOpcode() == ISD::SHL) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N01C && N1C) { + // Determine what the truncate's result bitsize and type would be. + EVT TruncVT = + EVT::getIntegerVT(*DAG.getContext(), + OpSizeInBits - N1C->getZExtValue()); + // Determine the residual right-shift amount. + signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + + // If the shift is not a no-op (in which case this should be just a sign + // extend already), the truncated to type is legal, sign_extend is legal + // on that type, and the truncate to that type is both legal and free, + // perform the transform. + if ((ShiftAmt > 0) && + TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && + TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && + TLI.isTruncateFree(VT, TruncVT)) { + + SDValue Amt = DAG.getConstant(ShiftAmt, + getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + N->getValueType(0), Trunc); + } + } + } + + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + EVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + // if c1 is equal to the number of bits the trunc removes + if (N0.getOpcode() == ISD::TRUNCATE && + (N0.getOperand(0).getOpcode() == ISD::SRL || + N0.getOperand(0).getOpcode() == ISD::SRA) && + N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOperand(1).hasOneUse() && + N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { + EVT LargeVT = N0.getOperand(0).getValueType(); + ConstantSDNode *LargeShiftAmt = + cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); + + if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == + LargeShiftAmt->getZExtValue()) { + SDValue Amt = + DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), + getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + N0.getOperand(0).getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + } + } + + // Simplify, based on bits shifted out of the LHS. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + + if (N1C) { + SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRA.getNode()) + return NewSRA; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSRL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold (srl c1, c2) -> c1 >>u c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + // fold (srl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (srl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (srl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + + // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) + if (N1C && N0.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, + DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + N0.getOperand(0)->getOperand(0), + DAG.getConstant(c1 + c2, ShiftCountVT))); + } + } + + // fold (srl (shl x, c), c) -> (and x, cst2) + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + N0.getValueSizeInBits() <= 64) { + uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } + + + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + EVT SmallVT = N0.getOperand(0).getValueType(); + if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + return DAG.getUNDEF(VT); + + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), + DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + APInt UnknownBits = ~KnownZero; + if (UnknownBits == 0) return DAG.getConstant(1, VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if ((UnknownBits & (UnknownBits - 1)) == 0) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL/XOR pair, which is likely to simplify more. + unsigned ShAmt = UnknownBits.countTrailingZeros(); + SDValue Op = N0.getOperand(0); + + if (ShAmt) { + Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); + AddToWorkList(Op.getNode()); + } + + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + Op, DAG.getConstant(1, VT)); + } + } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + EVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + if (N1C) { + SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRL.getNode()) + return NewSRL; + } + + // Attempt to convert a srl of a load into a narrower zero-extending load. + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + + // Here is a common situation. We want to optimize: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // However when after the source operand of SRL is optimized into AND, the SRL + // itself may not be optimized further. Look for it and add the BRCOND into + // the worklist. + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCTLZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTPOP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); + + // fold (select C, X, X) -> X + if (N1 == N2) + return N1; + // fold (select true, X, Y) -> X + if (N0C && !N0C->isNullValue()) + return N1; + // fold (select false, X, Y) -> Y + if (N0C && N0C->isNullValue()) + return N2; + // fold (select C, 1, X) -> (or C, X) + if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select C, 0, 1) -> (xor C, 1) + if (VT.isInteger() && + (VT0 == MVT::i1 || + (VT0.isInteger() && + TLI.getBooleanContents(false) == + TargetLowering::ZeroOrOneBooleanContent)) && + N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + SDValue XORNode; + if (VT == VT0) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + AddToWorkList(XORNode.getNode()); + if (VT.bitsGT(VT0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + } + // fold (select C, 0, X) -> (and (not C), X) + if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + } + // fold (select C, X, 1) -> (or (not C), X) + if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + } + // fold (select C, X, 0) -> (and C, X) + if (VT == MVT::i1 && N2C && N2C->isNullValue()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or X, Y) + if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select X, Y, X) -> (and X, Y) + // fold (select X, Y, 0) -> (and X, Y) + if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // FIXME: + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue N4 = N->getOperand(4); + ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, N->getDebugLoc(), false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } + + // Fold to a simpler select_cc + if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDValue(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); +} + +SDValue DAGCombiner::visitSETCC(SDNode *N) { + return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), + N->getDebugLoc()); +} + +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, + unsigned ExtOpc, + SmallVector<SDNode*, 4> &ExtendNodes, + const TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDValue UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa<ConstantSDNode>(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + continue; + } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // a good reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + +void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } +} + +SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (sext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + SDValue Op = N0.getOperand(0); + unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarType().getSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (OpBits < DestBits) + Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + else if (OpBits > DestBits) + Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // fold (sext (load x)) -> (sext (truncate (sextload x))) + // None of the supported targets knows how to perform load and sign extend + // on vectors in one instruction. We only perform this transformation on + // scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) + // fold (sext ( extload x)) -> (sext (truncate (sextload x))) + if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + if (N0.getOpcode() == ISD::SETCC) { + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) + return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + NegOne, DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + if (!VT.isVector() && (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); + } + + // fold (sext x) -> (zext x) if the sign bit is known zero. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// ComputeMaskedBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); + ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); + if (COp0 && COp0->isNullValue()) + Op = Op1; + else if (COp1 && COp1->isNullValue()) + Op = Op0; + else + return false; + + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + +SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (zext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + // FIXME: We should extend this to work for vectors too. + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + if (TruncatedBits == (KnownZero & TruncatedBits)) { + if (VT.bitsGT(Op.getValueType())) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + if (VT.bitsLT(Op.getValueType())) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + + return Op; + } + } + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE && + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + SDValue Op = N0.getOperand(0); + if (Op.getValueType().bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); + } else if (Op.getValueType().bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); + } + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + N0.getValueType().getScalarType()); + } + + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (zext (load x)) -> (zext (truncate (zextload x))) + // None of the supported targets knows how to perform load and vector_zext + // on vectors in one instruction. We only perform this transformation on + // scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::SEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) + // fold (zext ( extload x)) -> (zext (truncate (zextload x))) + if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + if (N0.getOpcode() == ISD::SETCC) { + if (!LegalOperations && VT.isVector()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // Only do this before legalize for now. + EVT N0VT = N0.getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), + DAG.getConstant(1, EltVT)); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + SDValue ShAmt = N0.getOperand(1); + unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); + if (N0.getOpcode() == ISD::SHL) { + SDValue InnerZExt = N0.getOperand(0); + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - + InnerZExt.getOperand(0).getValueType().getSizeInBits(); + if (ShAmtVal > KnownZeroBits) + return SDValue(); + } + + DebugLoc DL = N->getDebugLoc(); + + // Ensure that the shift amount is wide enough for the shifted value. + if (VT.getSizeInBits() >= 256) + ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); + + return DAG.getNode(N0.getOpcode(), DL, VT, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), + ShAmt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue TruncOp = N0.getOperand(0); + if (TruncOp.getValueType() == VT) + return TruncOp; // x iff x size == zext size. + if (TruncOp.getValueType().bitsGT(VT)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + } + + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + // None of the supported targets knows how to perform load and any_ext + // on vectors in one instruction. We only perform this transformation on + // scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ANY_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && + !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + if (N0.getOpcode() == ISD::SETCC) { + // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) + return SCC; + } + + return SDValue(); +} + +/// GetDemandedBits - See if the specified operand can be simplified with the +/// knowledge that only the bits specified by Mask are used. If so, return the +/// simpler operand, otherwise return a null SDValue. +SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: break; + case ISD::Constant: { + const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + assert(CV != 0 && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) { + return DAG.getConstant(NewVal, V.getValueType()); + } + break; + } + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) break; + APInt NewMask = Mask << Amt; + SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); + if (SimplifyLHS.getNode()) + return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + SimplifyLHS, V.getOperand(1)); + } + } + return SDValue(); +} + +/// ReduceLoadWidth - If the result of a wider load is shifted to right of N +/// bits and then truncated to a narrower type and where N is a multiple +/// of number of bits of the narrower type, transform it to a narrower load +/// from address + N / num of bits of new type. If the result is to be +/// extended, also fold the extension to form a extending load. +SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT ExtVT = VT; + + // This transformation isn't valid for vector loads. + if (VT.isVector()) + return SDValue(); + + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + } else if (Opc == ISD::SRL) { + // Another special-case: SRL is basically zero-extending a narrower value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); + } + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) + return SDValue(); + + unsigned EVTBits = ExtVT.getSizeInBits(); + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!ExtVT.isRound()) + return SDValue(); + + unsigned ShAmt = 0; + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShAmt = N01->getZExtValue(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + // Is the load width a multiple of size of VT? + if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) + return SDValue(); + } + + // At this point, we must have a load or else we can't do the transform. + if (!isa<LoadSDNode>(N0)) return SDValue(); + + // Because a SRL must be assumed to *need* to zero-extend the high bits + // (as opposed to anyext the high bits), we can't combine the zextload + // lowering of SRL and an sextload. + if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) + return SDValue(); + + // If the shift amount is larger than the input type then we're not + // accessing any of the loaded bytes. If the load was a zextload/extload + // then the result of the shift+trunc is zero/undef (handled elsewhere). + if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) + return SDValue(); + } + } + + // If the load is shifted left (and the result isn't shifted back right), + // we can fold the truncate through the shift. + unsigned ShLeftAmt = 0; + if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShLeftAmt = N01->getZExtValue(); + N0 = N0.getOperand(0); + } + } + + // If we haven't found a load, we can't narrow it. Don't transform one with + // multiple uses, this would require adding a new load. + if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) + return SDValue(); + + // Don't change the width of a volatile load. + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (LN0->isVolatile()) + return SDValue(); + + // Verify that we are actually reducing a load width here. + if (LN0->getMemoryVT().getSizeInBits() < EVTBits) + return SDValue(); + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LN0->getNumValues() > 2) + return SDValue(); + + EVT PtrType = N0.getOperand(1).getValueType(); + + if (PtrType == MVT::Untyped || PtrType.isExtended()) + // It's not possible to generate a constant of extended or untyped type. + return SDValue(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.getNode()); + + SDValue Load; + if (ExtType == ISD::NON_EXTLOAD) + Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), NewAlign); + else + Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + NewAlign); + + // Replace the old load's chain with the new load's chain. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + + // Shift the result left, if we've swallowed a left shift. + SDValue Result = Load; + if (ShLeftAmt != 0) { + EVT ShImmTy = getShiftAmountTy(Result.getValueType()); + if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) + ShImmTy = VT; + // If the shift amount is as large as the result size (but, presumably, + // no larger than the source) then the useful bits of the result are + // zero; we can't simply return the shortened shift, because the result + // of that operation is undefined. + if (ShLeftAmt >= VT.getSizeInBits()) + Result = DAG.getConstant(0, VT); + else + Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + } + + // Return the new loaded value. + return Result; +} + +SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N1)->getVT(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + + // fold (sext_in_reg c1) -> c1 + if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + } + + // fold (sext_in_reg (sext x)) -> (sext x) + // fold (sext_in_reg (aext x)) -> (sext x) + // if x is small enough. + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) + return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) + // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + if (ISD::isEXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + EVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + AddToWorkList(ExtLoad.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse() && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + EVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) + if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode() != 0) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + BSwap, N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + bool isLE = TLI.isLittleEndian(); + + // noop truncate + if (N0.getValueType() == N->getValueType(0)) + return N0; + // fold (truncate c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND) { + if (N0.getOperand(0).getValueType().bitsLT(VT)) + // if the source is smaller than the dest, we still need an extend + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0)); + if (N0.getOperand(0).getValueType().bitsGT(VT)) + // if the source is larger than the dest, than we just need the truncate + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); + } + + // Fold extract-and-trunc into a narrow extract. For example: + // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) + // i32 y = TRUNCATE(i64 x) + // -- becomes -- + // v16i8 b = BITCAST (v2i64 val) + // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) + // + // Note: We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse()) { + + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + EVT IndexTy = N0->getOperand(1).getValueType(); + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + NVT, N0.getOperand(0)); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + N->getDebugLoc(), TrTy, V, + DAG.getConstant(Index, IndexTy)); + } + } + + // Fold a series of buildvector, bitcast, and truncate if possible. + // For example fold + // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to + // (2xi32 (buildvector x, y)). + if (Level == AfterLegalizeVectorOps && VT.isVector() && + N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N0.getOperand(0).hasOneUse()) { + + SDValue BuildVect = N0.getOperand(0); + EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); + EVT TruncVecEltTy = VT.getVectorElementType(); + + // Check that the element types match. + if (BuildVectEltTy == TruncVecEltTy) { + // Now we only need to compute the offset of the truncated elements. + unsigned BuildVecNumElts = BuildVect.getNumOperands(); + unsigned TruncVecNumElts = VT.getVectorNumElements(); + unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; + + assert((BuildVecNumElts % TruncVecNumElts) == 0 && + "Invalid number of elements"); + + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) + Opnds.push_back(BuildVect.getOperand(i)); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + Opnds.size()); + } + } + + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currently we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + } + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + SDValue Reduced = ReduceLoadWidth(N); + if (Reduced.getNode()) + return Reduced; + } + // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), + // where ... are all 'undef'. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { + SmallVector<EVT, 8> VTs; + SDValue V; + unsigned Idx = 0; + unsigned NumDefs = 0; + + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue X = N0.getOperand(i); + if (X.getOpcode() != ISD::UNDEF) { + V = X; + Idx = i; + NumDefs++; + } + // Stop if more than one members are non-undef. + if (NumDefs > 1) + break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), + X.getValueType().getVectorNumElements())); + } + + if (NumDefs == 0) + return DAG.getUNDEF(VT); + + if (NumDefs == 1) { + assert(V.getNode() && "The single defined operand is empty!"); + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (i != Idx) { + Opnds.push_back(DAG.getUNDEF(VTs[i])); + continue; + } + SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + AddToWorkList(NV.getNode()); + Opnds.push_back(NV); + } + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + &Opnds[0], Opnds.size()); + } + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDValue Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.getNode(); + return Elt.getOperand(Elt.getResNo()).getNode(); +} + +/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// if load locations are consecutive. +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + LD1->getPointerInfo().getAddrSpace() != + LD2->getPointerInfo().getAddrSpace()) + return SDValue(); + EVT LD1VT = LD1->getValueType(0); + + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + // If both are volatile this would reduce the number of volatile loads. + // If one is volatile it might be ok, but play conservative and bail out. + !LD1->isVolatile() && + !LD2->isVolatile() && + DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { + unsigned Align = LD1->getAlignment(); + unsigned NewAlign = TLI.getDataLayout()-> + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign <= Align && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) + return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + LD1->getBasePtr(), LD1->getPointerInfo(), + false, false, false, Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBITCAST(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize, since afterward the target may be depending + // on the bitconvert. + // First check to see if this is all constant. + if (!LegalTypes && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + VT.isVector()) { + bool isSimple = true; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) + if (N0.getOperand(i).getOpcode() != ISD::UNDEF && + N0.getOperand(i).getOpcode() != ISD::Constant && + N0.getOperand(i).getOpcode() != ISD::ConstantFP) { + isSimple = false; + break; + } + + EVT DestEltVT = N->getValueType(0).getVectorElementType(); + assert(!DestEltVT.isVector() && + "Element type of vector ValueType must not be vector!"); + if (isSimple) + return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); + } + + // If the input is a constant, let getNode fold it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); + if (Res.getNode() != N) { + if (!LegalOperations || + TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) + return Res; + + // Folding it resulted in an illegal node, and it's too late to + // do that. Clean up the old node and forego the transformation. + // Ideally this won't happen very often, because instcombine + // and the earlier dagcombine runs (where illegal nodes are + // permitted) should have folded most of them already. + DAG.DeleteNode(Res.getNode()); + } + } + + // (conv (conv x, t1), t2) -> (conv x, t2) + if (N0.getOpcode() == ISD::BITCAST) + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile() && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + unsigned Align = TLI.getDataLayout()-> + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned OrigAlign = LN0->getAlignment(); + + if (Align <= OrigAlign) { + SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), OrigAlign); + AddToWorkList(N); + CombineTo(N0.getNode(), + DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + N0.getValueType(), Load), + Load.getValue(1)); + return Load; + } + } + + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // This often reduces constant pool loads. + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && + N0.getNode()->hasOneUse() && VT.isInteger() && + !VT.isVector() && !N0.getValueType().isVector()) { + SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, + N0.getOperand(0)); + AddToWorkList(NewConv.getNode()); + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + if (N0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(SignBit, VT)); + assert(N0.getOpcode() == ISD::FABS); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(~SignBit, VT)); + } + + // fold (bitconvert (fcopysign cst, x)) -> + // (or (and (bitconvert x), sign), (and cst, (not sign))) + // Note that we don't handle (copysign x, cst) because this can always be + // folded to an fneg or fabs. + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(0)) && + VT.isInteger() && !VT.isVector()) { + unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); + if (isTypeLegal(IntXVT)) { + SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + IntXVT, N0.getOperand(1)); + AddToWorkList(X.getNode()); + + // If X has a different width than the result/lhs, sext it or truncate it. + unsigned VTWidth = VT.getSizeInBits(); + if (OrigXWidth < VTWidth) { + X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } else if (OrigXWidth > VTWidth) { + // To get the sign bit in the right place, we have to shift it right + // before truncating. + X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X.getValueType(), X, + DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + AddToWorkList(X.getNode()); + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X, DAG.getConstant(SignBit, VT)); + AddToWorkList(X.getNode()); + + SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + VT, N0.getOperand(0)); + Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst, DAG.getConstant(~SignBit, VT)); + AddToWorkList(Cst.getNode()); + + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + } + } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) { + SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); + if (CombineLD.getNode()) + return CombineLD; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { + EVT VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + +/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector +/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the +/// destination element value type. +SDValue DAGCombiner:: +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); + + // Due to the FP element handling below calling this routine recursively, + // we can end up with a scalar-to-vector node here. + if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + DstEltVT, BV->getOperand(0))); + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDValue Op = BV->getOperand(i); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + DstEltVT, Op)); + AddToWorkList(Ops.back().getNode()); + } + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); + BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); + } + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = TLI.isLittleEndian(); + APInt NewBits = APInt(DstBitSize, 0); + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.getOpcode() == ISD::UNDEF) continue; + EltIsUndef = false; + + NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). + zextOrTrunc(SrcBitSize).zext(DstBitSize); + } + + if (EltIsUndef) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + } + + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + bool isS2V = ISD::isScalarToVector(BV); + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); + SmallVector<SDValue, 8> Ops; + + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (unsigned j = 0; j != NumOutputsPerInput; ++j) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + continue; + } + + APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> + getAPIntValue().zextOrTrunc(SrcBitSize); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + APInt ThisVal = OpVal.trunc(DstBitSize); + Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); + if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) + // Simply turn this into a SCALAR_TO_VECTOR of the new type. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + Ops[0]); + OpVal = OpVal.lshr(DstBitSize); + } + + // For big endian targets, swap the order of the pieces of each element. + if (TLI.isBigEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); +} + +SDValue DAGCombiner::visitFADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fadd c1, c2) -> c1 + c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + // fold (fadd A, 0) -> A + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N1CFP->getValueAPF().isZero()) + return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + GetNegatedExpression(N0, DAG, LegalOperations)); + + // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + // No FP constant should be created after legalization as Instruction + // Selection pass has hard time in dealing with FP constant. + // + // We don't need test this condition for transformation like following, as + // the DAG being transformed implies it is legal to take FP constant as + // operand. + // + // (fadd (fmul c, x), x) -> (fmul c+1, x) + // + bool AllowNewFpConst = (Level < AfterLegalizeDAG); + + // If allow, fold (fadd (fneg x), x) -> 0.0 + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + return DAG.getConstantFP(0.0, VT); + } + + // If allow, fold (fadd x, (fneg x)) -> 0.0 + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + return DAG.getConstantFP(0.0, VT); + } + + // In unsafe math mode, we can fold chains of FADD's of the same value + // into multiplications. This transform is not safe in general because + // we are reducing the number of rounding steps. + if (DAG.getTarget().Options.UnsafeFPMath && + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && + !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + + // (fadd (fmul c, x), x) -> (fmul c+1, x) + if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fmul x, c), x) -> (fmul c+1, x) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + + // (fadd x, (fmul c, x)) -> (fmul c+1, x) + if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fmul x, c)) -> (fmul c+1, x) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + + // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) + if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) + if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + } + + if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + if (AllowNewFpConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getConstantFP(4.0, VT)); + } + } + + // FADD -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + // fold (fsub A, 0) -> A + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fsub 0, B) -> -B + if (DAG.getTarget().Options.UnsafeFPMath && + N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, dl, VT, N1); + } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) + return DAG.getNode(ISD::FADD, dl, VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + + // If 'unsafe math' is enabled, fold + // (fsub x, x) -> 0.0 & + // (fsub x, (fadd x, y)) -> (fneg y) & + // (fsub x, (fadd y, x)) -> (fneg y) + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0 == N1) + return DAG.getConstantFP(0.0f, VT); + + if (N1.getOpcode() == ISD::FADD) { + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N11, DAG, LegalOperations); + else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N10, DAG, LegalOperations); + } + } + + // FSUB -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + } + + // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N00), N01, + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + // fold (fmul A, 0) -> 0 + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + // fold (fmul A, 0) -> 0, vector edition. + if (DAG.getTarget().Options.UnsafeFPMath && + ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + // fold (fmul A, 1.0) -> A + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return N0; + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + // fold (fmul X, -1.0) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + + // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N0.getOpcode() == ISD::FMUL && + N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFMA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0CFP && N0CFP->isZero()) + return N2; + if (N1CFP && N1CFP->isZero()) + return N2; + } + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); + + // Canonicalize (fma c, x, y) -> (fma x, c, y) + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FMUL && + N0 == N2.getOperand(0) && + N2.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); + } + + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FMUL && N1CFP && + N0.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), + N2); + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + AddToWorkList(RHSNeg.getNode()); + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + } + } + + // (fma x, c, x) -> (fmul x, (c+1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, VT))); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, VT))); + } + + + return SDValue(); +} + +SDValue DAGCombiner::visitFDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + DAG.getConstantFP(Recip, VT)); + } + + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + if (N0CFP && N1CFP) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + + if (N1CFP) { + const APFloat& V = N1CFP->getValueAPF(); + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + if (!V.isNegative()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + } else { + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + } + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // fold (sint_to_fp c1) -> c1fp + if (N0C && + // ...but only if the target supports immediate floating-point values + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and SINT_TO_FP is not legal on this target, + // but UINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to UINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select_cc x, y, 1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), + DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(0).getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // fold (uint_to_fp c1) -> c1fp + if (N0C && + // ...but only if the target supports immediate floating-point values + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and UINT_TO_FP is not legal on this target, + // but SINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to SINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_to_sint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_to_uint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (fp_round x)) -> (fp_round x) + if (N0.getOpcode() == ISD::FP_ROUND) { + // This is a value preserving truncation if both round's are. + bool IsTrunc = N->getConstantOperandVal(1) == 1 && + N0.getNode()->getConstantOperandVal(1) == 1; + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getIntPtrConstant(IsTrunc)); + } + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(Tmp.getNode()); + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + Tmp, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + + // fold (fp_round_inreg c1fp) -> c1fp + if (N0CFP && isTypeLegal(EVT)) { + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fp_extend c1fp) -> c1fp + if (N0CFP) + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the + // value of X. + if (N0.getOpcode() == ISD::FP_ROUND + && N0.getNode()->getConstantOperandVal(1) == 1) { + SDValue In = N0.getOperand(0); + if (In.getValueType() == VT) return In; + if (VT.bitsLT(In.getValueType())) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + } + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + N0.getValueType(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFNEG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options)) + return GetNegatedExpression(N0, DAG, LegalOperations); + + // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // constant pool values. + if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && + !VT.isVector() && + N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + VT, Int); + } + } + + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + if (CFP1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + N0.getOperand(1))); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVUnaryOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fabs c1) -> fabs(c1) + if (N0CFP) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + + // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // constant pool values. + if (!TLI.isFAbsFree(VT) && + N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBRCOND(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, + N1.getOperand(0).getValueType())) { + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + + if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || + ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && + (N1.getOperand(0).hasOneUse() && + N1.getOperand(0).getOpcode() == ISD::SRL))) { + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE) { + // Look pass the truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } + + // Match this pattern so that we can generate simpler code: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and i32 %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // This applies only when the AND constant value has one bit set and the + // SRL constant is equal to the log2 of the AND constant. The back-end is + // smart enough to convert the result into a TEST/JMP sequence. + SDValue Op0 = N1.getOperand(0); + SDValue Op1 = N1.getOperand(1); + + if (Op0.getOpcode() == ISD::AND && + Op1.getOpcode() == ISD::Constant) { + SDValue AndOp1 = Op0.getOperand(1); + + if (AndOp1.getOpcode() == ISD::Constant) { + const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + + if (AndConst.isPowerOf2() && + cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDValue SetCC = + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETNE); + + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + // Don't add the new BRCond into the worklist or else SimplifySelectCC + // will convert it back to (X & C1) >> C2. + CombineTo(N, NewBRCond, false); + // Truncate is dead. + if (Trunc) { + removeFromWorkList(Trunc); + DAG.DeleteNode(Trunc); + } + // Replace the uses of SRL with SETCC + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + if (Trunc) + // Restore N1 if the above transformation doesn't match. + N1 = N->getOperand(1); + } + + // Transform br(xor(x, y)) -> br(x != y) + // Transform br(xor(xor(x,y), 1)) -> br (x == y) + if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { + SDNode *TheXor = N1.getNode(); + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() == Op1.getOpcode()) { + // Avoid missing important xor optimizations. + SDValue Tmp = visitXOR(TheXor); + if (Tmp.getNode()) { + if (Tmp.getNode() != TheXor) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + + // visitXOR has changed XOR's operands or replaced the XOR completely, + // bail out. + return SDValue(N, 0); + } + } + + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { + bool Equal = false; + if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) + if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } + + EVT SetCCVT = N1.getValueType(); + if (LegalTypes) + SetCCVT = TLI.getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT, + Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); + // Replace the uses of XOR with SETCC + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } + + return SDValue(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDValue DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); + SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // Use SimplifySetCC to simplify SETCC's. + SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + false); + if (Simp.getNode()) AddToWorkList(Simp.getNode()); + + // fold to a simpler setcc + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + N->getOperand(0), Simp.getOperand(2), + Simp.getOperand(0), Simp.getOperand(1), + N->getOperand(4)); + + return SDValue(); +} + +/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that +/// uses N as its base pointer and that N may be folded in the load / store +/// addressing mode. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = Use->getValueType(0); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getValue().getValueType(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else if (N->getOpcode() == ISD::SUB) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = -Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else + return false; + + return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); +} + +/// CombineToPreIndexedLoadStore - Try turning a load / store into a +/// pre-indexed load / store when the base pointer is an add or subtract +/// and it has other uses besides the load / store. After the +/// transformation, the new indexed load / store has effectively folded +/// the add / subtract in and all of its other uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.getNode()->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + + // Backends without true r+i pre-indexed forms may need to pass a + // constant base with a variable offset so that constant coercion + // will work with the patterns in canonical form. + bool Swapped = false; + if (isa<ConstantSDNode>(BasePtr)) { + std::swap(BasePtr, Offset); + Swapped = true; + } + + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) + return false; + } + + // If the offset is a constant, there may be other adds of constants that + // can be folded with this one. We should do this to avoid having to keep + // a copy of the original base pointer. + SmallVector<SDNode *, 16> OtherUses; + if (isa<ConstantSDNode>(Offset)) + for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), + E = BasePtr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == Ptr.getNode()) + continue; + + if (Use->isPredecessorOf(N)) + continue; + + if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + OtherUses.clear(); + break; + } + + SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); + if (Op1.getNode() == BasePtr.getNode()) + std::swap(Op0, Op1); + assert(Op0.getNode() == BasePtr.getNode() && + "Use of ADD/SUB but not an operand"); + + if (!isa<ConstantSDNode>(Op1)) { + OtherUses.clear(); + break; + } + + // FIXME: In some cases, we can be smarter about this. + if (Op1.getValueType() != Offset.getValueType()) { + OtherUses.clear(); + break; + } + + OtherUses.push_back(Use); + } + + if (Swapped) + std::swap(BasePtr, Offset); + + // Now check for #3 and #4. + bool RealUse = false; + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == N) + continue; + if (N->hasPredecessorHelper(Use, Visited, Worklist)) + return false; + + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) + RealUse = true; + } + + if (!RealUse) + return false; + + SDValue Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.4 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + if (Swapped) + std::swap(BasePtr, Offset); + + // Replace other uses of BasePtr that can be updated to use Ptr + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { + unsigned OffsetIdx = 1; + if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) + OffsetIdx = 0; + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == + BasePtr.getNode() && "Expected BasePtr operand"); + + APInt OV = + cast<ConstantSDNode>(Offset)->getAPIntValue(); + if (AM == ISD::PRE_DEC) + OV = -OV; + + ConstantSDNode *CN = + cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); + APInt CNV = CN->getAPIntValue(); + if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) + CNV += OV; + else + CNV -= OV; + + SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); + if (OffsetIdx == 0) + std::swap(NewOp1, NewOp2); + + SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + OtherUses[i]->getDebugLoc(), + OtherUses[i]->getValueType(0), NewOp1, NewOp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); + removeFromWorkList(OtherUses[i]); + DAG.DeleteNode(OtherUses[i]); + } + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + removeFromWorkList(Ptr.getNode()); + DAG.DeleteNode(Ptr.getNode()); + + return true; +} + +/// CombineToPostIndexedLoadStore - Try to combine a load / store with a +/// add / sub of the base pointer node into a post-indexed load / store. +/// The transformation folded the add / subtract into the new indexed +/// load / store effectively and all of its uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + if (Ptr.getNode()->hasOneUse()) + return false; + + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Op = *I; + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + continue; + + // Check for #1. + bool TryNext = false; + for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), + EE = BasePtr.getNode()->use_end(); II != EE; ++II) { + SDNode *Use = *II; + if (Use == Ptr.getNode()) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode::use_iterator III = Use->use_begin(), + EEE = Use->use_end(); III != EEE; ++III) { + SDNode *UseUse = *III; + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + + if (TryNext) + continue; + + // Check for #2 + if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { + SDValue Result = isLoad + ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM) + : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.5 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(isLoad ? 1 : 0)); + removeFromWorkList(Op); + DAG.DeleteNode(Op); + return true; + } + } + } + + return false; +} + +SDValue DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + if (!LD->isVolatile()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (!N->hasAnyUseOfValue(0)) { + // It's not safe to use the two value CombineTo variant here. e.g. + // v1, chain2 = load chain1, loc + // v2, chain3 = load chain2, loc + // v3 = add v2, c + // Now we replace use of chain2 with chain1. This makes the second load + // isomorphic to the one we are deleting, and thus makes this load live. + DEBUG(dbgs() << "\nReplacing.6 "; + N->dump(&DAG); + dbgs() << "\nWith chain: "; + Chain.getNode()->dump(&DAG); + dbgs() << "\n"); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + + if (N->use_empty()) { + removeFromWorkList(N); + DAG.DeleteNode(N); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { + SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + DEBUG(dbgs() << "\nReplacing.7 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Undef.getNode()->dump(&DAG); + dbgs() << " and 2 other values\n"); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + // TODO: Handle store large -> read small portion. + // TODO: Handle TRUNCSTORE/LOADEXT + if (ISD::isNormalLoad(N) && !LD->isVolatile()) { + if (ISD::isNON_TRUNCStore(Chain.getNode())) { + StoreSDNode *PrevST = cast<StoreSDNode>(Chain); + if (PrevST->getBasePtr() == Ptr && + PrevST->getValue().getValueType() == N->getValueType(0)) + return CombineTo(N, Chain.getOperand(1), Chain); + } + } + + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > LD->getMemOperand()->getBaseAlignment()) { + SDValue NewLoad = + DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + LD->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + LD->getMemoryVT(), + LD->isVolatile(), LD->isNonTemporal(), Align); + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + } + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), + BetterChain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + LD->getValueType(0), + BetterChain, Ptr, LD->getPointerInfo(), + LD->getMemoryVT(), + LD->isVolatile(), + LD->isNonTemporal(), + LD->getAlignment()); + } + + // Create token factor to keep old chain connected. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplLoad.getValue(1)); + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + + // Replace uses with load result and token factor. Don't add users + // to work list. + return CombineTo(N, ReplLoad.getValue(0), Token, false); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + return SDValue(); +} + +/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the +/// load is having specific bytes cleared out. If so, return the byte size +/// being masked out and the shift amount. +static std::pair<unsigned, unsigned> +CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { + std::pair<unsigned, unsigned> Result(0, 0); + + // Check for the structure we're looking for. + if (V->getOpcode() != ISD::AND || + !isa<ConstantSDNode>(V->getOperand(1)) || + !ISD::isNormalLoad(V->getOperand(0).getNode())) + return Result; + + // Check the chain and pointer. + LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); + if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. + + // The store should be chained directly to the load or be an operand of a + // tokenfactor. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() != ISD::TokenFactor) + return Result; // Fail. + else { + bool isOk = false; + for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) + if (Chain->getOperand(i).getNode() == LD) { + isOk = true; + break; + } + if (!isOk) return Result; + } + + // This only handles simple types. + if (V.getValueType() != MVT::i16 && + V.getValueType() != MVT::i32 && + V.getValueType() != MVT::i64) + return Result; + + // Check the constant mask. Invert it so that the bits being masked out are + // 0 and the bits being kept are 1. Use getSExtValue so that leading bits + // follow the sign bit for uniformity. + uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); + unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. + unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. + if (NotMaskLZ == 64) return Result; // All zero mask. + + // See if we have a continuous run of bits. If so, we have 0*1+0* + if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + return Result; + + // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. + if (V.getValueType() != MVT::i64 && NotMaskLZ) + NotMaskLZ -= 64-V.getValueSizeInBits(); + + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; + switch (MaskedBytes) { + case 1: + case 2: + case 4: break; + default: return Result; // All one mask, or 5-byte mask. + } + + // Verify that the first bit starts at a multiple of mask so that the access + // is aligned the same as the access width. + if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + + Result.first = MaskedBytes; + Result.second = NotMaskTZ/8; + return Result; +} + + +/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that +/// provides a value as specified by MaskInfo. If so, replace the specified +/// store with a narrower store of truncated IVal. +static SDNode * +ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, + SDValue IVal, StoreSDNode *St, + DAGCombiner *DC) { + unsigned NumBytes = MaskInfo.first; + unsigned ByteShift = MaskInfo.second; + SelectionDAG &DAG = DC->getDAG(); + + // Check to see if IVal is all zeros in the part being masked in by the 'or' + // that uses this. If not, this is not a replacement. + APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), + ByteShift*8, (ByteShift+NumBytes)*8); + if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + + // Check that it is legal on the target to do this. It is legal if the new + // VT we're shrinking to (i8/i16/i32) is legal or we're still before type + // legalization. + MVT VT = MVT::getIntegerVT(NumBytes*8); + if (!DC->isTypeLegal(VT)) + return 0; + + // Okay, we can do this! Replace the 'St' store with a store of IVal that is + // shifted by ByteShift and truncated down to NumBytes. + if (ByteShift) + IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, + DC->getShiftAmountTy(IVal.getValueType()))); + + // Figure out the offset for the store and the alignment of the access. + unsigned StOffset; + unsigned NewAlign = St->getAlignment(); + + if (DAG.getTargetLoweringInfo().isLittleEndian()) + StOffset = ByteShift; + else + StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; + + SDValue Ptr = St->getBasePtr(); + if (StOffset) { + Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + NewAlign = MinAlign(NewAlign, StOffset); + } + + // Truncate down to the new size. + IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + + ++OpsNarrowed; + return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), + false, false, NewAlign).getNode(); +} + + +/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is +/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some +/// of the loaded bits, try narrowing the load and store if it would end up +/// being a win for performance or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + if (ST->isVolatile()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + EVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst + // is a byte mask indicating a consecutive number of bytes, check to see if + // Y is known to provide just those bytes. If so, we try to replace the + // load + replace + store sequence with a single (narrower) store, which makes + // the load dead. + if (Opc == ISD::OR) { + std::pair<unsigned, unsigned> MaskedLoad; + MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(1), ST,this)) + return SDValue(NewST, 0); + + // Or is commutative, so try swapping X and Y. + MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(0), ST,this)) + return SDValue(NewST, 0); + } + + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + Chain == SDValue(N0.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(N0); + if (LD->getBasePtr() != Ptr || + LD->getPointerInfo().getAddrSpace() != + ST->getPointerInfo().getAddrSpace()) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + while (NewBW < BitWidth && + !(TLI.isOperationLegalOrCustom(Opc, NewVT) && + TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, + std::min(BitWidth, ShAmt + NewBW)); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, Ptr.getValueType())); + SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + LD->getChain(), NewPtr, + LD->getPointerInfo().getWithOffset(PtrOff), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), NewAlign); + SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + DAG.getConstant(NewImm, NewVT)); + SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + NewVal, NewPtr, + ST->getPointerInfo().getWithOffset(PtrOff), + false, false, NewAlign); + + AddToWorkList(NewPtr.getNode()); + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewVal.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + +/// TransformFPLoadStorePair - For a given floating point load / store pair, +/// if the load value isn't used by any other operations, then consider +/// transforming the pair to integer load / store operations if the target +/// deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && + Value.hasOneUse() && + Chain == SDValue(Value.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(Value); + EVT VT = LD->getMemoryVT(); + if (!VT.isFloatingPoint() || + VT != ST->getMemoryVT() || + LD->isNonTemporal() || + ST->isNonTemporal() || + LD->getPointerInfo().getAddrSpace() != 0 || + ST->getPointerInfo().getAddrSpace() != 0) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || + !TLI.isOperationLegal(ISD::STORE, IntVT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) + return SDValue(); + + unsigned LDAlign = LD->getAlignment(); + unsigned STAlign = ST->getAlignment(); + Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); + if (LDAlign < ABIAlign || STAlign < ABIAlign) + return SDValue(); + + SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + false, false, false, LDAlign); + + SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + NewLD, ST->getBasePtr(), + ST->getPointerInfo(), + false, false, STAlign); + + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewST.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); + ++LdStFP2Int; + return NewST; + } + + return SDValue(); +} + +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; + } + + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr) { + bool IsIndexSignExt = false; + + // Just Base or possibly anything else. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Base + offset. + if (isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, + IsIndexSignExt); + } + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa<ConstantSDNode>(Offset)) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + } +}; + +/// Holds a pointer to an LSBaseSDNode as well as information on where it +/// is located in a sequence of memory operations connected by a chain. +struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; +}; + +/// Sorts store nodes in a link according to their offset from a shared +// base ptr. +struct ConsecutiveMemoryChainSorter { + bool operator()(MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + } +}; + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; + bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); + + // Don't merge vectors into wider inputs. + if (MemVT.isVector() || !MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants or loads. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa<LoadSDNode>(StoredVal); + if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && + !IsLoadSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 1); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.Base.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.Base.getOpcode() == ISD::UNDEF) + return false; + + // Save the LoadSDNodes that we find in the chain. + // We need to make sure that these nodes do not interfere with + // any of the store nodes. + SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; + + // Save the StoreSDNodes that we find in the chain. + SmallVector<MemOpLink, 8> StoreNodes; + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + unsigned Seq = 0; + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 1)->hasOneUse()) + break; + + // Find the base pointer and offset for this memory node. + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (!Ptr.equalBaseIndex(BasePtr)) + break; + + // Check that the alignment is the same. + if (Index->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Index->isVolatile() || Index->isIndexed()) + break; + + // No truncation. + if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) + if (St->isTruncatingStore()) + break; + + // The stored memory type must be the same. + if (Index->getMemoryVT() != MemVT) + break; + + // We do not allow unaligned stores because we want to prevent overriding + // stores. + if (Index->getAlignment()*8 != MemVT.getSizeInBits()) + break; + + // We found a potential memory operand to merge. + StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (1) { + if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { + // We found a store node. Use it for the next iteration. + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + // Save the load node for later. Continue the scan. + AliasLoadNodes.push_back(Ldn); + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = NULL; + break; + } + } + } + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the base pointer. + std::sort(StoreNodes.begin(), StoreNodes.end(), + ConsecutiveMemoryChainSorter()); + + // Scan the memory operations on the chain and find the first non-consecutive + // store memory address. + unsigned LastConsecutiveStore = 0; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { + + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + if (i > 0) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + } + + bool Alias = false; + // Check if this store interferes with any of the loads that we found. + for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) + if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { + Alias = true; + break; + } + // We found a load that alias with this store. Stop the sequence. + if (Alias) + break; + + // Mark this node as useful. + LastConsecutiveStore = i; + } + + // The node with the lowest store address. + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + + // Store the constants into memory as one consecutive store. + if (!IsLoadSrc) { + unsigned LastLegalType = 0; + unsigned LastLegalVectorType = 0; + bool NonZero = false; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { + NonZero |= !C->isNullValue(); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { + NonZero |= !C->getConstantFPValue()->isNullValue(); + } else { + // Non constant. + break; + } + + // Find a legal type for the constant store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalType = i+1; + // Or check whether a truncstore is legal. + else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) + LastLegalType = i+1; + } + + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + LastLegalVectorType = i + 1; + } + + // We only use vectors if the constant is known to be zero and the + // function is not marked with the noimplicitfloat attribute. + if (NonZero || NoVectors) + LastLegalVectorType = 0; + + // Check if we found a legal integer type to store. + if (LastLegalType == 0 && LastLegalVectorType == 0) + return false; + + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + unsigned EarliestNodeUsed = 0; + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // The earliest Node in the DAG. + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + StoredVal = DAG.getConstant(0, Ty); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ?(NumElem - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt<<=ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt|=C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + assert(false && "Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, StoreTy); + } + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the first store with the new store + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; + } + + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. + + // Look for load nodes which are used by the stored values. + SmallVector<MemOpLink, 8> LoadNodes; + + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); + if (!Ld) break; + + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; + + // Check that the alignment is the same as the stores. + if (Ld->getAlignment() != St->getAlignment()) + break; + + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + break; + + // We do not accept ext loads. + if (Ld->getExtensionType() != ISD::NON_EXTLOAD) + break; + + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; + + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); + // If this is not the first ptr that we check. + if (LdBasePtr.Base.getNode()) { + // The base ptr must be the same. + if (!LdPtr.equalBaseIndex(LdBasePtr)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; + } + + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); + } + + if (LoadNodes.size() < 2) + return false; + + // Scan the memory operations on the chain and find the first non-consecutive + // load memory address. These variables hold the index in the store node + // array. + unsigned LastConsecutiveLoad = 0; + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 0; + unsigned LastLegalIntegerType = 0; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = LoadNodes[0].MemNode->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads much share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + + // Find a legal type for the vector store. + EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(StoreTy)) + LastLegalVectorType = i + 1; + + // Find a legal type for the integer store. + unsigned StoreBW = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + if (TLI.isTypeLegal(StoreTy)) + LastLegalIntegerType = i + 1; + // Or check whether a truncstore and extload is legal. + else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) + LastLegalIntegerType = i+1; + } + } + + // Only use vector types if the vector type is larger than the integer type. + // If they are the same, use integers. + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; + unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) + return false; + + // The earliest Node in the DAG. + unsigned EarliestNodeUsed = 0; + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + for (unsigned i=1; i<NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + } + + DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); + DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, + FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), + false, false, false, + FirstLoad->getAlignment()); + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, + FirstInChain->getAlignment()); + + // Replace one of the loads with the new load. + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + + // Remove the rest of the load chains. + for (unsigned i = 1; i < NumElem ; ++i) { + // Replace all chain users of the old load nodes with the chain of the new + // load node. + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); + } + + // Replace the first store with the new store. + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + removeFromWorkList(St); + DAG.DeleteNode(St); + } + + return true; +} + +SDValue DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && + ST->isUnindexed()) { + unsigned OrigAlign = ST->getAlignment(); + EVT SVT = Value.getOperand(0).getValueType(); + unsigned Align = TLI.getDataLayout()-> + getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); + if (Align <= OrigAlign && + ((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) + return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getPointerInfo(), ST->isVolatile(), + ST->isNonTemporal(), OrigAlign); + } + + // Turn 'store undef, Ptr' -> nothing. + if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + return Chain; + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + if (Value.getOpcode() != ISD::TargetConstantFP) { + SDValue Tmp; + switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP type"); + case MVT::f16: // We don't do this for these yet. + case MVT::f80: + case MVT::f128: + case MVT::ppcf128: + break; + case MVT::f32: + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), MVT::i32); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getPointerInfo(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); + } + break; + case MVT::f64: + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), MVT::i64); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getPointerInfo(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, + ST->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + DAG.getConstant(4, Ptr.getValueType())); + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + Ptr, ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, + Alignment); + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + St0, St1); + } + + break; + } + } + } + + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > ST->getAlignment()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), Align); + } + } + + // Try transforming a pair floating point load / store ops to integer + // load / store ops. + SDValue NewST = TransformFPLoadStorePair(N); + if (NewST.getNode()) + return NewST; + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getPointerInfo(), + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); + } else { + ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + + // Don't add users to work list. + return CombineTo(N, Token, false); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->isUnindexed() && + Value.getValueType().isInteger()) { + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + SDValue Shorter = + GetDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits())); + AddToWorkList(Value.getNode()); + if (Shorter.getNode()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits()))) + return SDValue(N, 0); + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { + if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && + ST->isUnindexed() && !ST->isVolatile() && + // There can't be any side effects between the load and store, such as + // a call or store. + Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { + // The store is dead, remove it. + return Chain; + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) + && Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); + } + + // Only perform this optimization before the types are legal, because we + // don't want to perform this optimization on every DAGCombine invocation. + if (!LegalTypes) { + bool EverChanged = false; + + do { + // There can be multiple store sequences on the same chain. + // Keep trying to merge store sequences until we are unable to do so + // or until we merge the last store on the chain. + bool Changed = MergeConsecutiveStores(ST); + EverChanged |= Changed; + if (!Changed) break; + } while (ST->getOpcode() != ISD::DELETED_NODE); + + if (EverChanged) + return SDValue(N, 0); + } + + return ReduceLoadOpStoreWidth(N); +} + +SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); + + // If the inserted element is an UNDEF, just use the input vector. + if (InVal.getOpcode() == ISD::UNDEF) + return InVec; + + EVT VT = InVec.getValueType(); + + // If we can't generate a legal BUILD_VECTOR, exit + if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return SDValue(); + + // Check that we know which element is being inserted + if (!isa<ConstantSDNode>(EltNo)) + return SDValue(); + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially + // be converted to a BUILD_VECTOR). Fill in the Ops vector with the + // vector elements. + SmallVector<SDValue, 8> Ops; + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + Ops.append(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + } else if (InVec.getOpcode() == ISD::UNDEF) { + unsigned NElts = VT.getVectorNumElements(); + Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); + } else { + return SDValue(); + } + + // Insert the element + if (Elt < Ops.size()) { + // All the operands of BUILD_VECTOR must have the same type; + // we enforce that here. + EVT OpVT = Ops[0].getValueType(); + if (InVal.getValueType() != OpVT) + InVal = OpVT.bitsGT(InVal.getValueType()) ? + DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : + DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); + Ops[Elt] = InVal; + } + + // Return the new vector + return DAG.getNode(ISD::BUILD_VECTOR, dl, + VT, &Ops[0], Ops.size()); +} + +SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + // (vextract (scalar_to_vector val, 0) -> val + SDValue InVec = N->getOperand(0); + EVT VT = InVec.getValueType(); + EVT NVT = N->getValueType(0); + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. + SDValue InOp = InVec.getOperand(0); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + } + return InOp; + } + + SDValue EltNo = N->getOperand(1); + bool ConstEltNo = isa<ConstantSDNode>(EltNo); + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD + // patterns. For example on AVX, extracting elements from a wide vector + // without using extract_subvector. + if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE + && ConstEltNo && !LegalOperations) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int NumElem = VT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); + // Find the new index to extract from. + int OrigElt = SVOp->getMaskElt(Elt); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(NVT); + + // Select the right vector half to extract from. + if (OrigElt < NumElem) { + InVec = InVec->getOperand(0); + } else { + InVec = InVec->getOperand(1); + OrigElt -= NumElem; + } + + EVT IndexTy = N->getOperand(1).getValueType(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + InVec, DAG.getConstant(OrigElt, IndexTy)); + } + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!LegalOperations) return SDValue(); + + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + + if (ConstEltNo) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + bool NewLoad = false; + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + NewLoad = true; + } + + LoadSDNode *LN0 = NULL; + const ShuffleVectorSDNode *SVN = NULL; + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == ExtVT && + ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); + } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); + InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + InVec = InVec.getOperand(0); + } + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + } + } + + // Make sure we found a non-volatile load and the extractelement is + // the only use. + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + return SDValue(); + + // If Idx was -1 above, Elt is going to be -1, so just return undef. + if (Elt == -1) + return DAG.getUNDEF(LVT); + + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = + TLI.getDataLayout() + ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) + return SDValue(); + + Align = NewAlign; + } + + SDValue NewPtr = LN0->getBasePtr(); + unsigned PtrOff = 0; + + if (Elt) { + PtrOff = LVT.getSizeInBits() * Elt / 8; + EVT PtrType = NewPtr.getValueType(); + if (TLI.isBigEndian()) + PtrOff = VT.getSizeInBits() / 8 - PtrOff; + NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); + } + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); + Chain = Load.getValue(1); + if (NVT.bitsLT(LVT)) + Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explcitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(N); + return SDValue(N, 0); + } + + return SDValue(); +} + +// Simplify (build_vec (ext )) to (bitcast (build_vec )) +SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return SDValue(); + + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. We do not handle sign-extend because we can't fill the sign + // using shuffles. + EVT SourceType = MVT::Other; + bool AllAnyExt = true; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort if the element is not an extension. + if (!ZeroExt && !AnyExt) { + SourceType = MVT::Other; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + // First time. + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + SourceType = MVT::Other; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + AllAnyExt &= AnyExt; + } + + // In order to have valid types, all of the inputs must be extended from the + // same source type and all of the inputs must be any or zero extend. + // Scalar sizes must be a power of two. + EVT OutScalarTy = VT.getScalarType(); + bool ValidTypes = SourceType != MVT::Other && + isPowerOf2_32(OutScalarTy.getSizeInBits()) && + isPowerOf2_32(SourceType.getSizeInBits()); + + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. + if (!ValidTypes) + return SDValue(); + + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == VT.getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, VT, BV); +} + +SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + + EVT SrcVT = MVT::Other; + unsigned Opcode = ISD::DELETED_NODE; + unsigned NumDefs = 0; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + unsigned Opc = In.getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + // If all scalar values are floats and converted from integers. + if (Opcode == ISD::DELETED_NODE && + (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { + Opcode = Opc; + } + + if (Opc != Opcode) + return SDValue(); + + EVT InVT = In.getOperand(0).getValueType(); + + // If all scalar values are typed differently, bail out. It's chosen to + // simplify BUILD_VECTOR of integer types. + if (SrcVT == MVT::Other) + SrcVT = InVT; + if (SrcVT != InVT) + return SDValue(); + NumDefs++; + } + + // If the vector has just one element defined, it's not worth to fold it into + // a vectorized one. + if (NumDefs < 2) + return SDValue(); + + assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) + && "Should only handle conversion from integer to float."); + assert(SrcVT != MVT::Other && "Cannot determine source type!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + + if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) + return SDValue(); + + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + + if (In.getOpcode() == ISD::UNDEF) + Opnds.push_back(DAG.getUNDEF(SrcVT)); + else + Opnds.push_back(In.getOperand(0)); + } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + &Opnds[0], Opnds.size()); + AddToWorkList(BV.getNode()); + + return DAG.getNode(Opcode, dl, VT, BV); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + SDValue V = reduceBuildVecExtToExtBuildVec(N); + if (V.getNode()) + return V; + + V = reduceBuildVecConvertToConvertBuildVec(N); + if (V.getNode()) + return V; + + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT + // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from + // at most two distinct vectors, turn this into a shuffle node. + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + + SDValue VecIn1, VecIn2; + for (unsigned i = 0; i != NumInScalars; ++i) { + // Ignore undef inputs. + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + // If this input is something other than a EXTRACT_VECTOR_ELT with a + // constant index, bail out. + if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // We allow up to two distinct input vectors. + SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) + continue; + + if (VecIn1.getNode() == 0) { + VecIn1 = ExtractedFromVec; + } else if (VecIn2.getNode() == 0) { + VecIn2 = ExtractedFromVec; + } else { + // Too many inputs. + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + } + + // If everything is good, we can make a shuffle operation. + if (VecIn1.getNode()) { + SmallVector<int, 8> Mask; + for (unsigned i = 0; i != NumInScalars; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); + SDValue ExtVal = Extract.getOperand(1); + if (Extract.getOperand(0) == VecIn1) { + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + if (ExtIndex > VT.getVectorNumElements()) + return SDValue(); + + Mask.push_back(ExtIndex); + continue; + } + + // Otherwise, use InIdx + VecSize + unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + Mask.push_back(Idx+NumInScalars); + } + + // We can't generate a shuffle node with mismatched input and output types. + // Attempt to transform a single input vector to the correct type. + if ((VT != VecIn1.getValueType())) { + // We don't support shuffeling between TWO values of different types. + if (VecIn2.getNode() != 0) + return SDValue(); + + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) + return SDValue(); + + // If the input vector type has a different base type to the output + // vector type, bail out. + if (VecIn1.getValueType().getVectorElementType() != + VT.getVectorElementType()) + return SDValue(); + + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } + + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + + // Check that we were able to transform all incoming values to the same + // type. + if (VecIn2.getValueType() != VecIn1.getValueType() || + VecIn1.getValueType() != VT) + return SDValue(); + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. + if (!isTypeLegal(VT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + SDValue Ops[2]; + Ops[0] = VecIn1; + Ops[1] = VecIn2; + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of + // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector + // inputs come from at most two distinct vectors, turn this into a shuffle + // node. + + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) + return N->getOperand(0); + + // Check if all of the operands are undefs. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(N->getValueType(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { + EVT NVT = N->getValueType(0); + SDValue V = N->getOperand(0); + + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + + // Skip bitcasting + if (V->getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + DebugLoc dl = N->getDebugLoc(); + // Handle only simple case where vector being inserted and vector + // being extracted are of same type, and are half size of larger vectors. + EVT BigVT = V->getOperand(0).getValueType(); + EVT SmallVT = V->getOperand(1).getValueType(); + if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + return SDValue(); + + // Only handle cases where both indexes are constants with the same type. + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); + + if (InsIdx && ExtIdx && + InsIdx->getValueType(0).getSizeInBits() <= 64 && + ExtIdx->getValueType(0).getSizeInBits() <= 64) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal or bit offsets are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getNode(ISD::BITCAST, dl, + N->getOperand(0).getValueType(), + V->getOperand(0)), N->getOperand(1)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); + + // Canonicalize shuffle undef, undef -> undef + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + // Canonicalize shuffle v, v -> v, undef + if (N0 == N1) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) Idx -= NumElts; + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N0.getOpcode() == ISD::UNDEF) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElts) + Idx += NumElts; + else + Idx -= NumElts; + } + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Remove references to rhs if it is undef + if (N1.getOpcode() == ISD::UNDEF) { + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) { + Idx = -1; + Changed = true; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + } + + // If it is a splat, check if the argument vector is another splat or a + // build_vector with all scalar elements the same. + if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { + SDNode *V = N0.getNode(); + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + if (V->getOpcode() == ISD::BITCAST) { + SDValue ConvInput = V->getOperand(0); + if (ConvInput.getValueType().isVector() && + ConvInput.getValueType().getVectorNumElements() == NumElts) + V = ConvInput.getNode(); + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + assert(V->getNumOperands() == NumElts && + "BUILD_VECTOR has wrong number of operands"); + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; + } + } + + // If this shuffle node is simply a swizzle of another shuffle node, + // and it reverses the swizzle of the previous shuffle then we can + // optimize shuffle(shuffle(x, undef), undef) -> x. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() == ISD::UNDEF) { + + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // Shuffle nodes can only reverse shuffles with a single non-undef value. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + assert(Idx < (int)NumElts && "Index references undef operand"); + // Next, this index comes from the first value, which is the incoming + // shuffle. Adopt the incoming index. + if (Idx >= 0) + Idx = OtherSV->getMaskElt(Idx); + + // The combined shuffle must map each index to itself. + if (Idx >= 0 && (unsigned)Idx != i) + return SDValue(); + } + + return OtherSV->getOperand(0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { + if (!TLI.getShouldFoldAtomicFences()) + return SDValue(); + + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)), atomic.getResNo()); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)), + atomic.getResNo()); + default: + return SDValue(); + } +} + +/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform +/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (N->getOpcode() == ISD::AND) { + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa<ConstantSDNode>(Elt)) + return SDValue(); + + if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast<ConstantSDNode>(Elt)->isNullValue()) + Indices.push_back(NumElts); + else + return SDValue(); + } + + // Let's see if the target supports this vector_shuffle. + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + EVT EltVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + RVT, &ZeroOps[0], ZeroOps.size()); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); + } + } + + return SDValue(); +} + +/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { + assert(N->getValueType(0).isVector() && + "SimplifyVBinOp only works on vectors!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Shuffle = XformToShuffleWithZero(N); + if (Shuffle.getNode()) return Shuffle; + + // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold + // this operation. + if (LHS.getOpcode() == ISD::BUILD_VECTOR && + RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + // If these two elements can't be folded, bail out. + if ((LHSOp.getOpcode() != ISD::UNDEF && + LHSOp.getOpcode() != ISD::Constant && + LHSOp.getOpcode() != ISD::ConstantFP) || + (RHSOp.getOpcode() != ISD::UNDEF && + RHSOp.getOpcode() != ISD::Constant && + RHSOp.getOpcode() != ISD::ConstantFP)) + break; + + // Can't fold divide by zero. + if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || + N->getOpcode() == ISD::FDIV) { + if ((RHSOp.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || + (RHSOp.getOpcode() == ISD::ConstantFP && + cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) + break; + } + + EVT VT = LHSOp.getValueType(); + EVT RVT = RHSOp.getValueType(); + if (RVT != VT) { + // Integer BUILD_VECTOR operands may have types larger than the element + // size (e.g., when the element type is not legal). Prior to type + // legalization, the types may not match between the two BUILD_VECTORS. + // Truncate one of the operands to make them match. + if (RVT.getSizeInBits() > VT.getSizeInBits()) { + RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + } else { + LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + VT = RVT; + } + } + SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, + LHSOp, RHSOp); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::Constant && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() == LHS.getNumOperands()) + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + LHS.getValueType(), &Ops[0], Ops.size()); + } + + return SDValue(); +} + +/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. +SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { + assert(N->getValueType(0).isVector() && + "SimplifyVUnaryOp only works on vectors!"); + + SDValue N0 = N->getOperand(0); + + if (N0.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + // Operand is a BUILD_VECTOR node, see if we can constant fold it. + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.getOpcode() != ISD::UNDEF && + Op.getOpcode() != ISD::ConstantFP) + break; + EVT EltVT = Op.getValueType(); + SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); + } + + if (Ops.size() != N0.getNumOperands()) + return SDValue(); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + N0.getValueType(), &Ops[0], Ops.size()); +} + +SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, + SDValue N1, SDValue N2){ + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.getNode()) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4)); + AddToWorkList(SETCC.getNode()); + return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); + } + + return SCC; + } + return SDValue(); +} + +/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS +/// are the two values being selected between, see if we can simplify the +/// select. Callers of this should assume that TheSelect is deleted if this +/// returns true. As such, they should return the appropriate thing (e.g. the +/// node) back to the top-level of the DAG combiner loop to avoid it being +/// looked at. +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, + SDValue RHS) { + + // Cannot simplify select with vector condition + if (TheSelect->getOperand(0).getValueType().isVector()) return false; + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() != RHS.getOpcode() || + !LHS.hasOneUse() || !RHS.hasOneUse()) + return false; + + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // Token chains must be identical. + if (LHS.getOperand(0) != RHS.getOperand(0) || + // Do not let this transformation reduce the number of volatile loads. + LLD->isVolatile() || RLD->isVolatile() || + // If this is an EXTLOAD, the VT's must match. + LLD->getMemoryVT() != RLD->getMemoryVT() || + // If this is an EXTLOAD, the kind of extension must match. + (LLD->getExtensionType() != RLD->getExtensionType() && + // The only exception is if one of the extensions is anyext. + LLD->getExtensionType() != ISD::EXTLOAD && + RLD->getExtensionType() != ISD::EXTLOAD) || + // FIXME: this discards src value information. This is + // over-conservative. It would be beneficial to be able to remember + // both potential memory locations. Since we are discarding + // src value info, don't do the transformation if the memory + // locations are not in the default address space. + LLD->getPointerInfo().getAddrSpace() != 0 || + RLD->getPointerInfo().getAddrSpace() != 0 || + !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), + LLD->getBasePtr().getValueType())) + return false; + + // Check that the select condition doesn't reach either load. If so, + // folding this will induce a cycle into the DAG. If not, this is safe to + // xform, so create a select of the addresses. + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + SDNode *CondNode = TheSelect->getOperand(0).getNode(); + if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || + (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) + return false; + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || + RLD->isPredecessorOf(LLD)) + return false; + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } else { // Otherwise SELECT_CC + SDNode *CondLHS = TheSelect->getOperand(0).getNode(); + SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + + if ((LLD->hasAnyUseOfValue(1) && + (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || + (RLD->hasAnyUseOfValue(1) && + (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) + return false; + + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + + SDValue Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + // FIXME: Discards pointer info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->isVolatile(), LLD->isNonTemporal(), + LLD->isInvariant(), LLD->getAlignment()); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? + RLD->getExtensionType() : LLD->getExtensionType(), + TheSelect->getDebugLoc(), + TheSelect->getValueType(0), + // FIXME: Discards pointer info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->getMemoryVT(), LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; + } + + return false; +} + +/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// where 'cond' is the comparison specified by CC. +SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, + ISD::CondCode CC, bool NotExtCompare) { + // (x ? y : y) -> y. + if (N2 == N3) return N2; + + EVT VT = N2.getValueType(); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, DL, false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); + + // fold select_cc true, x, y -> x + if (SCCC && !SCCC->isNullValue()) + return N2; + // fold select_cc false, x, y -> y + if (SCCC && SCCC->isNullValue()) + return N3; + + // Check to see if we can simplify the select into an fabs node + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { + // Allow either -0.0 or 0.0 + if (CFP->getValueAPF().isZero()) { + // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs + if ((CC == ISD::SETGE || CC == ISD::SETGT) && + N0 == N2 && N3.getOpcode() == ISD::FNEG && + N2 == N3.getOperand(0)) + return DAG.getNode(ISD::FABS, DL, VT, N0); + + // select (setl[te] X, +/-0.0), fneg(X), X -> fabs + if ((CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::FNEG && + N2.getOperand(0) == N3) + return DAG.getNode(ISD::FABS, DL, VT, N3); + } + } + + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" + // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 + // in it. This is a win when the constant is not otherwise available because + // it replaces two constant pool loads with one. We only do this if the FP + // type is known to be legal, because if it isn't, then we are before legalize + // types an we want the other legalization to happen first (e.g. to avoid + // messing with soft float) and if the ConstantFP is not legal, because if + // it is legal, we may not need to store the FP constant in a constant pool. + if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) + if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { + if (TLI.isTypeLegal(N2.getValueType()) && + (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != + TargetLowering::Legal) && + // If both constants have multiple uses, then we won't need to do an + // extra load, they are likely around in registers for other users. + (TV->hasOneUse() || FV->hasOneUse())) { + Constant *Elts[] = { + const_cast<ConstantFP*>(FV->getConstantFPValue()), + const_cast<ConstantFP*>(TV->getConstantFPValue()) + }; + Type *FPTy = Elts[0]->getType(); + const DataLayout &TD = *TLI.getDataLayout(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); + SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + + // Get the offsets to the 0 and 1 element of the array so that we can + // select between them. + SDValue Zero = DAG.getIntPtrConstant(0); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize); + + SDValue Cond = DAG.getSetCC(DL, + TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + AddToWorkList(Cond.getNode()); + SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), + Cond, One, Zero); + AddToWorkList(CstOffset.getNode()); + CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CstOffset); + AddToWorkList(CPIdx.getNode()); + return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, + false, false, Alignment); + + } + } + + // Check to see if we can perform the "gzip trick", transforming + // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) + if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && + (N1C->isNullValue() || // (a < 0) ? b : 0 + (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); + if (XType.bitsGE(AType)) { + // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a + // single-bit constant. + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + unsigned ShCtV = N2C->getAPIntValue().logBase2(); + ShCtV = XType.getSizeInBits()-ShCtV-1; + SDValue ShCt = DAG.getConstant(ShCtV, + getShiftAmountTy(N0.getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + XType, N0, ShCt); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy(N0.getValueType()))); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + } + + // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) + // where y is has a single bit set. + // A plaintext description would be, we can turn the SELECT_CC into an AND + // when the condition can be materialized as an all-ones register. Any + // single bit-test can be materialized as an all-ones register with + // shift-left and shift-right-arith. + if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && + N0->getValueType(0) == VT && + N1C && N1C->isNullValue() && + N2C && N2C->isNullValue()) { + SDValue AndLHS = N0->getOperand(0); + ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { + // Shift the tested bit over the sign bit. + APInt AndMask = ConstAndRHS->getAPIntValue(); + SDValue ShlAmt = + DAG.getConstant(AndMask.countLeadingZeros(), + getShiftAmountTy(AndLHS.getValueType())); + SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + + // Now arithmetic right shift it all the way over, so the result is either + // all-ones, or zero. + SDValue ShrAmt = + DAG.getConstant(AndMask.getBitWidth()-1, + getShiftAmountTy(Shl.getValueType())); + SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + + return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + } + } + + // fold select C, 16, 0 -> shl C, 4 + if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + TLI.getBooleanContents(N0.getValueType().isVector()) == + TargetLowering::ZeroOrOneBooleanContent) { + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->getAPIntValue() == 1) + return SDValue(); + + // Get a SetCC of the condition + // NOTE: Don't create a SETCC if it's not legal on this target. + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, + LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } + + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); + + if (N2C->getAPIntValue() == 1) + return Temp; + + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); + } + } + + // Check to see if this is the equivalent of setcc + // FIXME: Turn all of these into setcc if setcc if setcc is legal + // otherwise, go ahead with the folds. + if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + EVT XType = N0.getValueType(); + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + if (Res.getValueType() != VT) + Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); + return Res; + } + + // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) + if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + (!LegalOperations || + TLI.isOperationLegal(ISD::CTLZ, XType))) { + SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + return DAG.getNode(ISD::SRL, DL, XType, Ctlz, + DAG.getConstant(Log2_32(XType.getSizeInBits()), + getShiftAmountTy(Ctlz.getValueType()))); + } + // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) + if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { + SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + XType, DAG.getConstant(0, XType), N0); + SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + return DAG.getNode(ISD::SRL, DL, XType, + DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy(XType))); + } + // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { + SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy(N0.getValueType()))); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + } + } + + // Check to see if this is an integer abs. + // select_cc setg[te] X, 0, X, -X -> + // select_cc setgt X, -1, X, -X -> + // select_cc setl[te] X, 0, -X, X -> + // select_cc setlt X, 1, -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C) { + ConstantSDNode *SubC = NULL; + if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (N1C->isAllOnesValue() && CC == ISD::SETGT)) && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); + else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || + (N1C->isOne() && CC == ISD::SETLT)) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); + + EVT XType = N0.getValueType(); + if (SubC && SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy(N0.getValueType()))); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + } + + return SDValue(); +} + +/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildSDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildUDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// FindBaseOffset - Return true if base is a frame index, which is known not +// to alias with anything but itself. Provides base object and offset as +// results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, + const GlobalValue *&GV, const void *&CV) { + // Assume it is a primitive operation. + Base = Ptr; Offset = 0; GV = 0; CV = 0; + + // If it's an adding a simple constant then integrate the offset. + if (Base.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { + Base = Base.getOperand(0); + Offset += C->getZExtValue(); + } + } + + // Return the underlying GlobalValue, and update the Offset. Return false + // for GlobalAddressSDNode since the same GlobalAddress may be represented + // by multiple nodes with different offsets. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { + GV = G->getGlobal(); + Offset += G->getOffset(); + return false; + } + + // Return the underlying Constant value, and update the Offset. Return false + // for ConstantSDNodes since the same constant pool entry may be represented + // by multiple nodes with different offsets. + if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { + CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() + : (const void *)C->getConstVal(); + Offset += C->getOffset(); + return false; + } + // If it's any of the following then it can't alias with anything but itself. + return isa<FrameIndexSDNode>(Base); +} + +/// isAlias - Return true if there is any possibility that the two addresses +/// overlap. +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, + const MDNode *TBAAInfo1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2, + const MDNode *TBAAInfo2) const { + // If they are the same then they must be aliases. + if (Ptr1 == Ptr2) return true; + + // Gather base node and offset information. + SDValue Base1, Base2; + int64_t Offset1, Offset2; + const GlobalValue *GV1, *GV2; + const void *CV1, *CV2; + bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); + + // If they have a same base address then check to see if they overlap. + if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + + // It is possible for different frame indices to alias each other, mostly + // when tail call optimization reuses return address slots for arguments. + // To catch this case, look up the actual index of frame indices to compute + // the real alias relationship. + if (isFrameIndex1 && isFrameIndex2) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); + Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + } + + // Otherwise, if we know what the bases are, and they aren't identical, then + // we know they cannot alias. + if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + return false; + + // If we know required SrcValue1 and SrcValue2 have relatively large alignment + // compared to the size and offset of the access, we may be able to prove they + // do not alias. This check is conservative for now to catch cases created by + // splitting vector types. + if ((SrcValueAlign1 == SrcValueAlign2) && + (SrcValueOffset1 != SrcValueOffset2) && + (Size1 == Size2) && (SrcValueAlign1 > Size1)) { + int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; + int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + + // There is no overlap between these relatively aligned accesses of similar + // size, return no alias. + if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + return false; + } + + if (CombinerGlobalAA) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); + int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; + int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + AliasAnalysis::AliasResult AAResult = + AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), + AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); + if (AAResult == AliasAnalysis::NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { + SDValue Ptr0, Ptr1; + int64_t Size0, Size1; + const Value *SrcValue0, *SrcValue1; + int SrcValueOffset0, SrcValueOffset1; + unsigned SrcValueAlign0, SrcValueAlign1; + const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; + FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + SrcValueAlign0, SrcTBAAInfo0); + FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + SrcValueAlign1, SrcTBAAInfo1); + return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + SrcValueAlign0, SrcTBAAInfo0, + Ptr1, Size1, SrcValue1, SrcValueOffset1, + SrcValueAlign1, SrcTBAAInfo1); +} + +/// FindAliasInfo - Extracts the relevant alias information from the memory +/// node. Returns true if the operand was a load. +bool DAGCombiner::FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign, + const MDNode *&TBAAInfo) const { + LSBaseSDNode *LS = cast<LSBaseSDNode>(N); + + Ptr = LS->getBasePtr(); + Size = LS->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LS->getSrcValue(); + SrcValueOffset = LS->getSrcValueOffset(); + SrcValueAlign = LS->getOriginalAlignment(); + TBAAInfo = LS->getTBAAInfo(); + return isa<LoadSDNode>(LS); +} + +/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases) { + SmallVector<SDValue, 8> Chains; // List of chains to visit. + SmallPtrSet<SDNode *, 16> Visited; // Visited node set. + + // Get alias information for node. + SDValue Ptr; + int64_t Size; + const Value *SrcValue; + int SrcValueOffset; + unsigned SrcValueAlign; + const MDNode *SrcTBAAInfo; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo); + + // Starting off. + Chains.push_back(OriginalChain); + unsigned Depth = 0; + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDValue Chain = Chains.back(); + Chains.pop_back(); + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll + // find more and revert to original chain since the xform is unlikely to be + // profitable. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > 6 || Aliases.size() == 2) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + break; + } + + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode())) + continue; + + switch (Chain.getOpcode()) { + case ISD::EntryToken: + // Entry token is ideal chain operand, but handled in FindBetterChain. + break; + + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for Chain. + SDValue OpPtr; + int64_t OpSize; + const Value *OpSrcValue; + int OpSrcValueOffset; + unsigned OpSrcValueAlign; + const MDNode *OpSrcTBAAInfo; + bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, + OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign, + OpSrcTBAAInfo); + + // If chain is alias then stop here. + if (!(IsLoad && IsOpLoad) && + isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + SrcTBAAInfo, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign, OpSrcTBAAInfo)) { + Aliases.push_back(Chain); + } else { + // Look further up the chain. + Chains.push_back(Chain.getOperand(0)); + ++Depth; + } + break; + } + + case ISD::TokenFactor: + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + break; + } + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + ++Depth; + break; + + default: + // For all other instructions we will just have to take what we can get. + Aliases.push_back(Chain); + break; + } + } +} + +/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking +/// for a better chain (aliasing node.) +SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { + SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + // If no operands then chain to entry token. + if (Aliases.size() == 0) + return DAG.getEntryNode(); + + // If a single operand then chain to it. We don't need to revisit it. + if (Aliases.size() == 1) + return Aliases[0]; + + // Construct a custom tailored token factor. + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); +} + +// SelectionDAG::Combine - This is the entry point for the file. +// +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + DAGCombiner(*this, AA, OptLevel).Run(Level); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp new file mode 100644 index 0000000..9ac738e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -0,0 +1,1507 @@ +//===-- FastISel.cpp - Implementation of the FastISel class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the FastISel class. +// +// "Fast" instruction selection is designed to emit very poor code quickly. +// Also, it is not designed to be able to do much lowering, so most illegal +// types (e.g. i64 on 32-bit targets) and operations are not supported. It is +// also not intended to be able to do much optimization, except in a few cases +// where doing optimizations reduces overall compile time. For example, folding +// constants into immediate fields is often done, because it's cheap and it +// reduces the number of instructions later phases have to examine. +// +// "Fast" instruction selection is able to fail gracefully and transfer +// control to the SelectionDAG selector for operations that it doesn't +// support. In many cases, this allows us to avoid duplicating a lot of +// the complicated lowering logic that SelectionDAG currently has. +// +// The intended use for "fast" instruction selection is "-O0" mode +// compilation, where the quality of the generated code is irrelevant when +// weighed against the speed at which the code can be generated. Also, +// at -O0, the LLVM optimizers are not running, and this makes the +// compile time of codegen a much higher portion of the overall compile +// time. Despite its limitations, "fast" instruction selection is able to +// handle enough code on its own to provide noticeable overall speedups +// in -O0 compiles. +// +// Basic operations are supported in a target-independent way, by reading +// the same instruction descriptions that the SelectionDAG selector reads, +// and identifying simple arithmetic operations that can be directly selected +// from simple operators. More complicated operations currently require +// target-specific code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " + "target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " + "target-specific selector"); +STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); + +/// startNewBlock - Set the current block to which generated machine +/// instructions will be appended, and clear the local CSE map. +/// +void FastISel::startNewBlock() { + LocalValueMap.clear(); + + EmitStartPt = 0; + + // Advance the emit start point past any EH_LABEL instructions. + MachineBasicBlock::iterator + I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); + while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { + EmitStartPt = I; + ++I; + } + LastLocalValue = EmitStartPt; +} + +bool FastISel::LowerArguments() { + if (!FuncInfo.CanLowerReturn) + // Fallback to SDISel argument lowering code to deal with sret pointer + // parameter. + return false; + + if (!FastLowerArguments()) + return false; + + // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), + E = FuncInfo.Fn->arg_end(); I != E; ++I) { + if (!I->use_empty()) { + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; + } + } + return true; +} + +void FastISel::flushLocalValueMap() { + LocalValueMap.clear(); + LastLocalValue = EmitStartPt; + recomputeInsertPt(); +} + +bool FastISel::hasTrivialKill(const Value *V) const { + // Don't consider constants or arguments to have trivial kills. + const Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // No-op casts are trivially coalesced by fast-isel. + if (const CastInst *Cast = dyn_cast<CastInst>(I)) + if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + !hasTrivialKill(Cast->getOperand(0))) + return false; + + // GEPs with all zero indices are trivially coalesced by fast-isel. + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) + return false; + + // Only instructions with a single use in the same basic block are considered + // to have trivial kills. + return I->hasOneUse() && + !(I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr) && + cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); +} + +unsigned FastISel::getRegForValue(const Value *V) { + EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + // Don't handle non-simple values in FastISel. + if (!RealVT.isSimple()) + return 0; + + // Ignore illegal types. We must do this before looking up the value + // in ValueMap because Arguments are given virtual registers regardless + // of whether FastISel can handle them. + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT)) { + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) + VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); + else + return 0; + } + + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); + if (Reg != 0) + return Reg; + + // In bottom-up mode, just create the virtual register which will be used + // to hold the value. It will be materialized later. + if (isa<Instruction>(V) && + (!isa<AllocaInst>(V) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) + return FuncInfo.InitializeRegForValue(V); + + SavePoint SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); + + leaveLocalValueArea(SaveInsertPt); + + return Reg; +} + +/// materializeRegForValue - Helper for getRegForValue. This function is +/// called when the value isn't already available in a register and must +/// be materialized with new instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().getActiveBits() <= 64) + Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) { + Reg = TargetMaterializeAlloca(cast<AllocaInst>(V)); + } else if (isa<ConstantPointerNull>(V)) { + // Translate this as an integer zero so that it can be + // local-CSE'd with actual integer zeros. + Reg = + getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + if (CF->isNullValue()) { + Reg = TargetMaterializeFloatZero(CF); + } else { + // Try to emit the constant directly. + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + } + + if (!Reg) { + // Try to emit the constant by using an integer constant with a cast. + const APFloat &Flt = CF->getValueAPF(); + EVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (isExact) { + APInt IntVal(IntBitWidth, x); + + unsigned IntegerReg = + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); + if (IntegerReg != 0) + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, + IntegerReg, /*Kill=*/false); + } + } + } else if (const Operator *Op = dyn_cast<Operator>(V)) { + if (!SelectOperator(Op, Op->getOpcode())) + if (!isa<Instruction>(Op) || + !TargetSelectInstruction(cast<Instruction>(Op))) + return 0; + Reg = lookUpRegForValue(Op); + } else if (isa<UndefValue>(V)) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + } + + // If target-independent code couldn't handle the value, give target-specific + // code a try. + if (!Reg && isa<Constant>(V)) + Reg = TargetMaterializeConstant(cast<Constant>(V)); + + // Don't cache constant materializations in the general ValueMap. + // To do so would require tracking what uses they dominate. + if (Reg != 0) { + LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } + return Reg; +} + +unsigned FastISel::lookUpRegForValue(const Value *V) { + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominates-use requirement enforced. + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; + return LocalValueMap[V]; +} + +/// UpdateValueMap - Update the value map to include the new mapping for this +/// instruction, or insert an extra copy to get the result in a previous +/// determined register. +/// NOTE: This is only necessary because we might select a block that uses +/// a value before we select the block that defines the value. It might be +/// possible to fix this by selecting blocks in reverse postorder. +void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { + if (!isa<Instruction>(I)) { + LocalValueMap[I] = Reg; + return; + } + + unsigned &AssignedReg = FuncInfo.ValueMap[I]; + if (AssignedReg == 0) + // Use the new register. + AssignedReg = Reg; + else if (Reg != AssignedReg) { + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + for (unsigned i = 0; i < NumRegs; i++) + FuncInfo.RegFixups[AssignedReg+i] = Reg+i; + + AssignedReg = Reg; + } +} + +std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return std::pair<unsigned, bool>(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + else if (IdxVT.bitsGT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + return std::pair<unsigned, bool>(IdxN, IdxNIsKill); +} + +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + FuncInfo.MBB = FuncInfo.InsertPt->getParent(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +void FastISel::removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + while (I != E) { + MachineInstr *Dead = &*I; + ++I; + Dead->eraseFromParent(); + ++NumFastIselDead; + } + recomputeInsertPt(); +} + +FastISel::SavePoint FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + DebugLoc OldDL = DL; + recomputeInsertPt(); + DL = DebugLoc(); + SavePoint SP = { OldInsertPt, OldDL }; + return SP; +} + +void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = llvm::prior(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt.InsertPt; + DL = OldInsertPt.DL; +} + +/// SelectBinaryOp - Select and emit code for a binary operator instruction, +/// which has an opcode which directly corresponds to the given ISD opcode. +/// +bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { + EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + if (!TLI.isTypeLegal(VT)) { + // MVT::i1 is special. Allow AND, OR, or XOR because they + // don't require additional zeroing, which makes them easy. + if (VT == MVT::i1 && + (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) + VT = TLI.getTypeToTransformTo(I->getContext(), VT); + else + return false; + } + + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, + Op1IsKill, CI->getZExtValue(), + VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // Check if the second operand is a constant and handle it appropriately. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t Imm = CI->getZExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && + cast<BinaryOperator>(I)->isExact() && + isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; + } + + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + + // Check if the second operand is a constant float. + if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { + unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, CF); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + // Now we have both operands in registers. Emit the instruction. + unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, + Op0, Op0IsKill, + Op1, Op1IsKill); + if (ResultReg == 0) + // Target-specific code wasn't able to find a machine opcode for + // the given ISD opcode and type. Halt "fast" selection and bail. + return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectGetElementPtr(const User *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool NIsKill = hasTrivialKill(I->getOperand(0)); + + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; + Type *Ty = I->getOperand(0)->getType(); + MVT VT = TLI.getPointerTy(); + for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, + E = I->op_end(); OI != E; ++OI) { + const Value *Idx = *OI; + if (StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) continue; + // N = N + Offset + TotalOffs += + TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD.getTypeAllocSize(Ty); + std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + if (ElementSize != 1) { + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + IdxNIsKill = true; + } + N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, N); + return true; +} + +bool FastISel::SelectCall(const User *I) { + const CallInst *Call = cast<CallInst>(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); + ComputeUsesVAFloatArgument(*Call, &MMI); + + const Function *F = Call->getCalledFunction(); + if (!F) return false; + + // Handle selected intrinsic function calls. + switch (F->getIntrinsicID()) { + default: break; + // At -O0 we don't care about the lifetime intrinsics. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // The donothing intrinsic does, well, nothing. + case Intrinsic::donothing: + return true; + + case Intrinsic::dbg_declare: { + const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); + if (!DIVariable(DI->getVariable()).Verify() || + !FuncInfo.MF->getMMI().hasDebugInfo()) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + const Value *Address = DI->getAddress(); + if (!Address || isa<UndefValue>(Address)) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + unsigned Reg = 0; + unsigned Offset = 0; + if (const Argument *Arg = dyn_cast<Argument>(Address)) { + // Some arguments' frame index is recorded during argument lowering. + Offset = FuncInfo.getArgumentFrameIndex(Arg); + if (Offset) + Reg = TRI.getFrameRegister(*FuncInfo.MF); + } + if (!Reg) + Reg = lookUpRegForValue(Address); + + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Reg = FuncInfo.InitializeRegForValue(Address); + + if (Reg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(Reg, RegState::Debug).addImm(Offset) + .addMetadata(DI->getVariable()); + else + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << DI); + return true; + } + case Intrinsic::dbg_value: { + // This form of DBG_VALUE is target-independent. + const DbgValueInst *DI = cast<DbgValueInst>(Call); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const Value *V = DI->getValue(); + if (!V) { + // Currently the optimizer can produce this; insert an undef to + // help debugging. Probably the optimizer should not do this. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addCImm(CI).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addFPImm(CF).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else if (unsigned Reg = lookUpRegForValue(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + } else { + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << DI); + } + return true; + } + case Intrinsic::objectsize: { + ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); + unsigned long long Res = CI->isZero() ? -1ULL : 0; + Constant *ResCI = ConstantInt::get(Call->getType(), Res); + unsigned ResultReg = getRegForValue(ResCI); + if (ResultReg == 0) + return false; + UpdateValueMap(Call, ResultReg); + return true; + } + case Intrinsic::expect: { + unsigned ResultReg = getRegForValue(Call->getArgOperand(0)); + if (ResultReg == 0) + return false; + UpdateValueMap(Call, ResultReg); + return true; + } + } + + // Usually, it does not make sense to initialize a value, + // make an unrelated function call and use the value, because + // it tends to be spilled on the stack. So, we move the pointer + // to the last local value to the beginning of the block, so that + // all the values which have already been materialized, + // appear after the call. It also makes sense to skip intrinsics + // since they tend to be inlined. + if (!isa<IntrinsicInst>(Call)) + flushLocalValueMap(); + + // An arbitrary call. Bail. + return false; +} + +bool FastISel::SelectCast(const User *I, unsigned Opcode) { + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the destination type is legal. + if (!TLI.isTypeLegal(DstVT)) + return false; + + // Check if the source operand is legal. + if (!TLI.isTypeLegal(SrcVT)) + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); + + unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), + DstVT.getSimpleVT(), + Opcode, + InputReg, InputRegIsKill); + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectBitCast(const User *I) { + // If the bitcast doesn't change the type, just use the operand value. + if (I->getType() == I->getOperand(0)->getType()) { + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) + return false; + UpdateValueMap(I, Reg); + return true; + } + + // Bitcasts of other values become reg-reg copies or BITCAST operators. + EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstEVT = TLI.getValueType(I->getType()); + if (SrcEVT == MVT::Other || DstEVT == MVT::Other || + !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) + // Unhandled type. Halt "fast" selection and bail. + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DstVT = DstEVT.getSimpleVT(); + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // First, try to perform the bitcast by inserting a reg-reg copy. + unsigned ResultReg = 0; + if (SrcVT == DstVT) { + const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0); + } + } + + // If the reg-reg copy failed, select a BITCAST opcode. + if (!ResultReg) + ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); + + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool +FastISel::SelectInstruction(const Instruction *I) { + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa<TerminatorInst>(I)) + if (!HandlePHINodesInSuccessorBlocks(I->getParent())) + return false; + + DL = I->getDebugLoc(); + + MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (const CallInst *Call = dyn_cast<CallInst>(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc::Func Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + } + + // First, try doing target-independent selection. + if (SelectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; + DL = DebugLoc(); + return true; + } + // Remove dead code. However, ignore call instructions since we've flushed + // the local value map and recomputed the insert point. + if (!isa<CallInst>(I)) { + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + } + + // Next, try calling the target to attempt to handle the instruction. + SavedInsertPt = FuncInfo.InsertPt; + if (TargetSelectInstruction(I)) { + ++NumFastIselSuccessTarget; + DL = DebugLoc(); + return true; + } + // Check for dead code and remove as necessary. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + + DL = DebugLoc(); + return false; +} + +/// FastEmitBranch - Emit an unconditional branch to the given block, +/// unless it is the immediate (fall-through) successor, and update +/// the CFG. +void +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { + + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, + SmallVector<MachineOperand, 0>(), DL); + } + FuncInfo.MBB->addSuccessor(MSucc); +} + +/// SelectFNeg - Emit an FNeg operation. +/// +bool +FastISel::SelectFNeg(const User *I) { + unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); + if (OpReg == 0) return false; + + bool OpRegIsKill = hasTrivialKill(I); + + // If the target has ISD::FNEG, use it. + EVT VT = TLI.getValueType(I->getType()); + unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), + ISD::FNEG, OpReg, OpRegIsKill); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + + // Bitcast the value to integer, twiddle the sign bit with xor, + // and then bitcast it back to floating-point. + if (VT.getSizeInBits() > 64) return false; + EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); + if (!TLI.isTypeLegal(IntVT)) + return false; + + unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::BITCAST, OpReg, OpRegIsKill); + if (IntReg == 0) + return false; + + unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, + IntReg, /*Kill=*/true, + UINT64_C(1) << (VT.getSizeInBits()-1), + IntVT.getSimpleVT()); + if (IntResultReg == 0) + return false; + + ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), + ISD::BITCAST, IntResultReg, /*Kill=*/true); + if (ResultReg == 0) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool +FastISel::SelectExtractValue(const User *U) { + const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); + if (!EVI) + return false; + + // Make sure we only try to handle extracts with a legal result. But also + // allow i1 because it's easy. + EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true); + if (!RealVT.isSimple()) + return false; + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT) && VT != MVT::i1) + return false; + + const Value *Op0 = EVI->getOperand(0); + Type *AggTy = Op0->getType(); + + // Get the base result register. + unsigned ResultReg; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); + if (I != FuncInfo.ValueMap.end()) + ResultReg = I->second; + else if (isa<Instruction>(Op0)) + ResultReg = FuncInfo.InitializeRegForValue(Op0); + else + return false; // fast-isel can't handle aggregate constants at the moment + + // Get the actual result register, which is an offset from the base register. + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); + + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + + for (unsigned i = 0; i < VTIndex; i++) + ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); + + UpdateValueMap(EVI, ResultReg); + return true; +} + +bool +FastISel::SelectOperator(const User *I, unsigned Opcode) { + switch (Opcode) { + case Instruction::Add: + return SelectBinaryOp(I, ISD::ADD); + case Instruction::FAdd: + return SelectBinaryOp(I, ISD::FADD); + case Instruction::Sub: + return SelectBinaryOp(I, ISD::SUB); + case Instruction::FSub: + // FNeg is currently represented in LLVM IR as a special case of FSub. + if (BinaryOperator::isFNeg(I)) + return SelectFNeg(I); + return SelectBinaryOp(I, ISD::FSUB); + case Instruction::Mul: + return SelectBinaryOp(I, ISD::MUL); + case Instruction::FMul: + return SelectBinaryOp(I, ISD::FMUL); + case Instruction::SDiv: + return SelectBinaryOp(I, ISD::SDIV); + case Instruction::UDiv: + return SelectBinaryOp(I, ISD::UDIV); + case Instruction::FDiv: + return SelectBinaryOp(I, ISD::FDIV); + case Instruction::SRem: + return SelectBinaryOp(I, ISD::SREM); + case Instruction::URem: + return SelectBinaryOp(I, ISD::UREM); + case Instruction::FRem: + return SelectBinaryOp(I, ISD::FREM); + case Instruction::Shl: + return SelectBinaryOp(I, ISD::SHL); + case Instruction::LShr: + return SelectBinaryOp(I, ISD::SRL); + case Instruction::AShr: + return SelectBinaryOp(I, ISD::SRA); + case Instruction::And: + return SelectBinaryOp(I, ISD::AND); + case Instruction::Or: + return SelectBinaryOp(I, ISD::OR); + case Instruction::Xor: + return SelectBinaryOp(I, ISD::XOR); + + case Instruction::GetElementPtr: + return SelectGetElementPtr(I); + + case Instruction::Br: { + const BranchInst *BI = cast<BranchInst>(I); + + if (BI->isUnconditional()) { + const BasicBlock *LLVMSucc = BI->getSuccessor(0); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + FastEmitBranch(MSucc, BI->getDebugLoc()); + return true; + } + + // Conditional branches are not handed yet. + // Halt "fast" selection and bail. + return false; + } + + case Instruction::Unreachable: + // Nothing to emit. + return true; + + case Instruction::Alloca: + // FunctionLowering has the static-sized case covered. + if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I))) + return true; + + // Dynamic-sized alloca is not handled yet. + return false; + + case Instruction::Call: + return SelectCall(I); + + case Instruction::BitCast: + return SelectBitCast(I); + + case Instruction::FPToSI: + return SelectCast(I, ISD::FP_TO_SINT); + case Instruction::ZExt: + return SelectCast(I, ISD::ZERO_EXTEND); + case Instruction::SExt: + return SelectCast(I, ISD::SIGN_EXTEND); + case Instruction::Trunc: + return SelectCast(I, ISD::TRUNCATE); + case Instruction::SIToFP: + return SelectCast(I, ISD::SINT_TO_FP); + + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); + if (DstVT.bitsGT(SrcVT)) + return SelectCast(I, ISD::ZERO_EXTEND); + if (DstVT.bitsLT(SrcVT)) + return SelectCast(I, ISD::TRUNCATE); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) return false; + UpdateValueMap(I, Reg); + return true; + } + + case Instruction::ExtractValue: + return SelectExtractValue(I); + + case Instruction::PHI: + llvm_unreachable("FastISel shouldn't visit PHI nodes!"); + + default: + // Unhandled instruction. Halt "fast" selection and bail. + return false; + } +} + +FastISel::FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FuncInfo(funcInfo), + MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), + MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), + TD(*TM.getDataLayout()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()), + TRI(*TM.getRegisterInfo()), + LibInfo(libInfo) { +} + +FastISel::~FastISel() {} + +bool FastISel::FastLowerArguments() { + return false; +} + +unsigned FastISel::FastEmit_(MVT, MVT, + unsigned) { + return 0; +} + +unsigned FastISel::FastEmit_r(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/) { + return 0; +} + +unsigned FastISel::FastEmit_rr(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/) { + return 0; +} + +unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_f(MVT, MVT, + unsigned, const ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_ri(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rf(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + const ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rri(MVT, MVT, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/, + uint64_t /*Imm*/) { + return 0; +} + +/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with an immediate operand using FastEmit_ri. +/// If that fails, it materializes the immediate into a register and try +/// FastEmit_rr instead. +unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, + unsigned Op0, bool Op0IsKill, + uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + + // First check if immediate type is legal. If not, we can't use the ri form. + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + if (ResultReg != 0) + return ResultReg; + unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (MaterialReg == 0) { + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), + VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); + assert (MaterialReg != 0 && "Unable to materialize imm."); + if (MaterialReg == 0) return 0; + } + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); +} + +unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { + return MRI.createVirtualRegister(RC); +} + +unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); + return ResultReg; +} + +unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + const TargetRegisterClass *RC = MRI.getRegClass(Op0); + MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx); + return ResultReg; +} + +/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op +/// with all but the least significant bit set to zero. +unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +} + +/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. +/// Emit code to ensure constants are copied into registers when needed. +/// Remember the virtual registers that need to be added to the Machine PHI +/// nodes as input. We cannot just directly add them, because expansion +/// might result in multiple MBB's for one BB. As such, the start of the +/// BB might correspond to a different MBB than the end. +bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary because FastISel doesn't + // use CreateRegs to create registers, so it always creates + // exactly one register for each non-void instruction. + EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) + VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); + else { + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + } + + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + // Set the DebugLoc for the copy. Prefer the location of the operand + // if there is one; use the location of the PHI otherwise. + DL = PN->getDebugLoc(); + if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) + DL = Inst->getDebugLoc(); + + unsigned Reg = getRegForValue(PHIOp); + if (Reg == 0) { + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + DL = DebugLoc(); + } + } + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp new file mode 100644 index 0000000..b46edad --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -0,0 +1,483 @@ +//===-- FunctionLoweringInfo.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating functions from LLVM IR into +// Machine IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "function-lowering-info" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +using namespace llvm; + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { + if (I->use_empty()) return false; + if (isa<PHINode>(I)) return true; + const BasicBlock *BB = I->getParent(); + for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) + return true; + } + return false; +} + +FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) + : TLI(tli) { +} + +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { + Fn = &fn; + MF = &mf; + RegInfo = &MF->getRegInfo(); + + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI); + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, + Fn->isVarArg(), + Outs, Fn->getContext()); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::const_iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { + Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + // The object may need to be placed onto the stack near the stack + // protector if one exists. Determine here if this object is a suitable + // candidate. I.e., it would trigger the creation of a stack protector. + bool MayNeedSP = + (AI->isArrayAllocation() || + (TySize >= 8 && isa<ArrayType>(Ty) && + cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, + MayNeedSP, AI); + } + + for (; BB != EB; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Mark values used outside their block as exported, by allocating + // a virtual register for them. + if (isUsedOutsideOfDefiningBlock(I)) + if (!isa<AllocaInst>(I) || + !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(I); + + // Collect llvm.dbg.declare information. This is done now instead of + // during the initial isel pass through the IR so that it is done + // in a predictable order. + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { + MachineModuleInfo &MMI = MF->getMMI(); + if (MMI.hasDebugInfo() && + DIVariable(DI->getVariable()).Verify() && + !DI->getDebugLoc().isUnknown()) { + // Don't handle byval struct arguments or VLAs, for example. + // Non-byval arguments are handled here (they refer to the stack + // temporary alloca at this point). + const Value *Address = DI->getAddress(); + if (Address) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + DenseMap<const AllocaInst *, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI != StaticAllocaMap.end()) { // Check for VLAs. + int FI = SI->second; + MMI.setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); + } + } + } + } + } + } + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn->begin(); BB != EB; ++BB) { + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); + MBBMap[BB] = MBB; + MF->push_back(MBB); + + // Transfer the address-taken flag. This is necessary because there could + // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only + // the first one should be marked. + if (BB->hasAddressTaken()) + MBB->setHasAddressTaken(); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + for (BasicBlock::const_iterator I = BB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (PN->use_empty()) continue; + + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + + DebugLoc DL = PN->getDebugLoc(); + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } + + // Mark landing pad blocks. + for (BB = Fn->begin(); BB != EB; ++BB) + if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) + MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); +} + +/// clear - Clear out all the function-specific state. This returns this +/// FunctionLoweringInfo to an empty state, ready to be used for a +/// different function. +void FunctionLoweringInfo::clear() { + assert(CatchInfoFound.size() == CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); + + MBBMap.clear(); + ValueMap.clear(); + StaticAllocaMap.clear(); +#ifndef NDEBUG + CatchInfoLost.clear(); + CatchInfoFound.clear(); +#endif + LiveOutRegInfo.clear(); + VisitedBBs.clear(); + ArgDbgValues.clear(); + ByValArgFrameIndexMap.clear(); + RegFixups.clear(); +} + +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(MVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +/// CreateRegs - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, Ty, ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); + + unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = CreateReg(RegisterVT); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the +/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If +/// the register's LiveOutInfo is for a smaller bit width, it is extended to +/// the larger bit width by zero extension. The bit width must be no smaller +/// than the LiveOutInfo's existing bit width. +const FunctionLoweringInfo::LiveOutInfo * +FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { + if (!LiveOutRegInfo.inBounds(Reg)) + return NULL; + + LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; + if (!LOI->IsValid) + return NULL; + + if (BitWidth > LOI->KnownZero.getBitWidth()) { + LOI->NumSignBits = 1; + LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth); + LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth); + } + + return LOI; +} + +/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination +/// register based on the LiveOutInfo of its operands. +void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { + Type *Ty = PN->getType(); + if (!Ty->isIntegerTy() || Ty->isVectorTy()) + return; + + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, Ty, ValueVTs); + assert(ValueVTs.size() == 1 && + "PHIs with non-vector integer types should have a single VT."); + EVT IntVT = ValueVTs[0]; + + if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1) + return; + IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT); + unsigned BitWidth = IntVT.getSizeInBits(); + + unsigned DestReg = ValueMap[PN]; + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + return; + LiveOutRegInfo.grow(DestReg); + LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; + + Value *V = PN->getIncomingValue(0); + if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = Val.getNumSignBits(); + DestLOI.KnownZero = ~Val; + DestLOI.KnownOne = Val; + } else { + assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" + "CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI = *SrcLOI; + } + + assert(DestLOI.KnownZero.getBitWidth() == BitWidth && + DestLOI.KnownOne.getBitWidth() == BitWidth && + "Masks should have the same bit width as the type."); + + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); + DestLOI.KnownZero &= ~Val; + DestLOI.KnownOne &= Val; + continue; + } + + assert(ValueMap.count(V) && "V should have been placed in ValueMap when " + "its CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); + DestLOI.KnownZero &= SrcLOI->KnownZero; + DestLOI.KnownOne &= SrcLOI->KnownOne; + } +} + +/// setArgumentFrameIndex - Record frame index for the byval +/// argument. This overrides previous frame index entry for this argument, +/// if any. +void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, + int FI) { + ByValArgFrameIndexMap[A] = FI; +} + +/// getArgumentFrameIndex - Get frame index for the byval argument. +/// If the argument does not have any assigned frame index then 0 is +/// returned. +int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { + DenseMap<const Argument *, int>::iterator I = + ByValArgFrameIndexMap.find(A); + if (I != ByValArgFrameIndexMap.end()) + return I->second; + DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); + return 0; +} + +/// ComputeUsesVAFloatArgument - Determine if any floating-point values are +/// being passed to this variadic function, and set the MachineModuleInfo's +/// usesVAFloatArgument flag if so. This flag is used to emit an undefined +/// reference to _fltused on Windows, which will link in MSVCRT's +/// floating-point support. +void llvm::ComputeUsesVAFloatArgument(const CallInst &I, + MachineModuleInfo *MMI) +{ + FunctionType *FT = cast<FunctionType>( + I.getCalledValue()->getType()->getContainedType(0)); + if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Type* T = I.getArgOperand(i)->getType(); + for (po_iterator<Type*> i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (i->isFloatingPointTy()) { + MMI->setUsesVAFloatArgument(true); + return; + } + } + } + } +} + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB) { + // Inform the MachineModuleInfo of the personality for this landing pad. + const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1)); + assert(CE->getOpcode() == Instruction::BitCast && + isa<Function>(CE->getOperand(0)) && + "Personality should be a function"); + MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); + + // Gather all the type infos for this landing pad and pass them along to + // MachineModuleInfo. + std::vector<const GlobalVariable *> TyInfo; + unsigned N = I.getNumArgOperands(); + + for (unsigned i = N - 1; i > 1; --i) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) { + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + !FilterLength; + assert(FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) { + TyInfo.reserve(N - FirstCatch); + for (unsigned j = FirstCatch; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + if (!FilterLength) { + // Cleanup. + MMI->addCleanup(MBB); + } else { + // Filter. + TyInfo.reserve(FilterLength - 1); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); + MMI->addFilterTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + N = i; + } + } + + if (N > 2) { + TyInfo.reserve(N - 2); + for (unsigned j = 2; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + } +} + +/// AddLandingPadInfo - Extract the exception handling information from the +/// landingpad instruction and add them to the specified machine module info. +void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, + MachineBasicBlock *MBB) { + MMI.addPersonality(MBB, + cast<Function>(I.getPersonalityFn()->stripPointerCasts())); + + if (I.isCleanup()) + MMI.addCleanup(MBB); + + // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct, + // but we need to do it this way because of how the DWARF EH emitter + // processes the clauses. + for (unsigned i = I.getNumClauses(); i != 0; --i) { + Value *Val = I.getClause(i - 1); + if (I.isCatch(i - 1)) { + MMI.addCatchTypeInfo(MBB, + dyn_cast<GlobalVariable>(Val->stripPointerCasts())); + } else { + // Add filters in a list. + Constant *CVal = cast<Constant>(Val); + SmallVector<const GlobalVariable*, 4> FilterList; + for (User::op_iterator + II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) + FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts())); + + MMI.addFilterTypeInfo(MBB, FilterList); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp new file mode 100644 index 0000000..3b1abd7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -0,0 +1,982 @@ +//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instr-emitter" +#include "InstrEmitter.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +/// MinRCSize - Smallest register class we allow when constraining virtual +/// registers. If satisfying all register class constraints would require +/// using a smaller register class, emit a COPY to a new virtual register +/// instead. +const unsigned MinRCSize = 4; + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional glue operands (which do +/// not go into the resulting MachineInstr). +unsigned InstrEmitter::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Glue) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// countOperands - The inputs to target nodes have any actual inputs first, +/// followed by an optional chain operand, then an optional glue operand. +/// Compute the number of actual operands that will go into the resulting +/// MachineInstr. +/// +/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding +/// the chain and glue. These operands may be implicit on the machine instr. +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + + // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { + if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) + continue; + if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) + if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + continue; + NumImpUses = N - I; + break; + } + + return N; +} + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void InstrEmitter:: +EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, + unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = NULL; + MVT VT = Node->getSimpleValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT); + + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Glue) + continue; + Match = false; + if (User->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = 0; + if (i+II.getNumDefs() < II.getNumOperands()) { + RC = TRI->getAllocatableClass( + TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); + } + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = + TRI->getCommonSubClass(UseRC, RC); + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI->getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, + const MCInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = + TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + if (II.OpInfo[i].isOptionalDef()) { + // Optional def must be a physical register. + unsigned NumResults = CountResults(Node); + VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + MIB.addReg(VRBase, RegState::Define); + } + + if (!VRBase && !IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MIB.addReg(VRBase, RegState::Define); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI->createVirtualRegister(RC); + MIB.addReg(VRBase, RegState::Define); + } + + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned InstrEmitter::getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its MCInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = + TLI->getRegClassFor(Op.getSimpleValueType()); + VReg = MRI->createVirtualRegister(RC); + } + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); + return VReg; + } + + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const MCInstrDesc &MCID = MIB->getDesc(); + bool isOptDef = IIOpNum < MCID.getNumOperands() && + MCID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it, but first attempt to + // shrink VReg's register class within reason. For example, if VReg == GR32 + // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. + if (II) { + const TargetRegisterClass *DstRC = 0; + if (IIOpNum < II->getNumOperands()) + DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); + if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } + } + + // If this value has only one use, that use is a kill. This is a + // conservative approximation. InstrEmitter does trivial coalescing + // with CopyFromReg nodes, so don't emit kill flags for them. + // Avoid kill flags on Schedule cloned nodes, since there will be + // multiple uses. + // Tied operands are never killed, so we need to check that. And that + // means we need to determine the index of the operand. + bool isKill = Op.hasOneUse() && + Op.getNode()->getOpcode() != ISD::CopyFromReg && + !IsDebug && + !(IsClone || IsCloned); + if (isKill) { + unsigned Idx = MIB->getNumOperands(); + while (Idx > 0 && + MIB->getOperand(Idx-1).isReg() && + MIB->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1; + if (isTied) + isKill = false; + } + + MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) | + getDebugRegState(IsDebug)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + MIB.addImm(C->getSExtValue()); + } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { + MIB.addFPImm(F->getConstantFPValue()); + } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + // Turn additional physreg operands into implicit uses on non-variadic + // instructions. This is used by call and return instructions passing + // arguments in registers. + bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); + MIB.addReg(R->getReg(), getImplRegState(Imp)); + } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { + MIB.addRegMask(RM->getRegMask()); + } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags()); + } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { + MIB.addMBB(BBNode->getBasicBlock()); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + MIB.addFrameIndex(FI->getIndex()); + } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { + MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM->getDataLayout()->getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM->getDataLayout()->getTypeAllocSize(Type); + } + } + + unsigned Idx; + MachineConstantPool *MCP = MF->getConstantPool(); + if (CP->isMachineConstantPoolEntry()) + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); + } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { + MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { + MIB.addBlockAddress(BA->getBlockAddress(), + BA->getOffset(), + BA->getTargetFlags()); + } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { + MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } +} + +unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, + MVT VT, DebugLoc DL) { + const TargetRegisterClass *VRC = MRI->getRegClass(VReg); + const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); + + // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg + // within reason. + if (RC && RC != VRC) + RC = MRI->constrainRegClass(VReg, RC, MinRCSize); + + // VReg has been adjusted. It can be used with SubIdx operands now. + if (RC) + return VReg; + + // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual + // register instead. + RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx); + assert(RC && "No legal register class for VT supports that SubIdx"); + unsigned NewReg = MRI->createVirtualRegister(RC); + BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) + .addReg(VReg); + return NewReg; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no + // constraints on the %dst register, COPY can target all legal register + // classes. + unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *TRC = + TLI->getRegClassFor(Node->getSimpleValueType(0)); + + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned SrcReg, DstReg, DefSubIdx; + if (DefMI && + TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && + SubIdx == DefSubIdx && + TRC == MRI->getRegClass(SrcReg)) { + // Optimize these: + // r1025 = s/zext r1024, 4 + // r1026 = extract_subreg r1025, 4 + // to a copy + // r1026 = copy r1024 + VRBase = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + MRI->clearKillFlags(SrcReg); + } else { + // VReg may not support a SubIdx sub-register, and we may need to + // constrain its register class or issue a COPY to a compatible register + // class. + VReg = ConstrainForSubReg(VReg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->getDebugLoc()); + + // Create the destreg if it is missing. + if (VRBase == 0) + VRBase = MRI->createVirtualRegister(TRC); + + // Create the extract_subreg machine instruction. + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); + } + } else if (Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + + // Figure out the register class to create for the destreg. It should be + // the largest legal register class supporting SubIdx sub-registers. + // RegisterCoalescer will constrain it further if it decides to eliminate + // the INSERT_SUBREG instruction. + // + // %dst = INSERT_SUBREG %src, %sub, SubIdx + // + // is lowered by TwoAddressInstructionPass to: + // + // %dst = COPY %src + // %dst:SubIdx = COPY %sub + // + // There is no constraint on the %src register class. + // + const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); + SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); + assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); + + if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) + VRBase = MRI->createVirtualRegister(SRC); + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetOpcode::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast<ConstantSDNode>(N0); + MIB.addImm(SD->getZExtValue()); + } else + AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + // Add the subregster being inserted + AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + MIB.addImm(SubIdx); + MBB->insert(InsertPos, MIB); + } else + llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + + // Create the new VReg in the destination class and emit a copy. + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = + TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. +/// +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); + unsigned NumOps = Node->getNumOperands(); + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); + for (unsigned i = 1; i != NumOps; ++i) { + SDValue Op = Node->getOperand(i); + if ((i & 1) == 0) { + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); + // Skip physical registers as they don't have a vreg to get and we'll + // insert copies for them in TwoAddressInstructionPass anyway. + if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); + if (SRC && SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } + } + } + AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + } + + MBB->insert(InsertPos, MIB); + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitDbgValue - Generate machine instruction for a dbg_value node. +/// +MachineInstr * +InstrEmitter::EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap) { + uint64_t Offset = SD->getOffset(); + MDNode* MDPtr = SD->getMDPtr(); + DebugLoc DL = SD->getDebugLoc(); + + if (SD->getKind() == SDDbgValue::FRAMEIX) { + // Stack address; this needs to be lowered in target-dependent fashion. + // EmitTargetCodeForFrameDebugValue is responsible for allocation. + unsigned FrameIx = SD->getFrameIx(); + return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + } + // Otherwise, we're going to create an instruction here. + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + if (SD->getKind() == SDDbgValue::SDNODE) { + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + MIB.addReg(0U); // undef + else + AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); + } else if (SD->getKind() == SDDbgValue::CONST) { + const Value *V = SD->getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); + else + MIB.addImm(CI->getSExtValue()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + MIB.addFPImm(CF); + } else { + // Could be an Undef. In any case insert an Undef so we can see what we + // dropped. + MIB.addReg(0U); + } + } else { + // Insert an Undef so we can see what we dropped. + MIB.addReg(0U); + } + + MIB.addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + +/// EmitMachineNode - Generate machine code for a target-specific node and +/// needed dependencies. +/// +void InstrEmitter:: +EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetOpcode::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + // Handle REG_SEQUENCE specially. + if (Opc == TargetOpcode::REG_SEQUENCE) { + EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + if (Opc == TargetOpcode::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const MCInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NumImpUses = 0; + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); + bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + if (II.isVariadic()) + assert(NumMIOperands >= II.getNumOperands() && + "Too few operands for a variadic node!"); + else + assert(NumMIOperands >= II.getNumOperands() && + NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + + NumImpUses && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + bool HasOptPRefs = II.getNumDefs() > NumResults; + assert((!HasOptPRefs || !HasPhysRegOuts) && + "Unable to cope with optional defs and phys regs defs!"); + unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + for (unsigned i = NumSkip; i != NodeOperands; ++i) + AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Transfer all of the memory reference descriptions of this instruction. + MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), + cast<MachineSDNode>(Node)->memoperands_end()); + + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MIB); + + // The MachineInstr may also define physregs instead of virtregs. These + // physreg values can reach other instructions in different ways: + // + // 1. When there is a use of a Node value beyond the explicitly defined + // virtual registers, we emit a CopyFromReg for one of the implicitly + // defined physregs. This only happens when HasPhysRegOuts is true. + // + // 2. A CopyFromReg reading a physreg may be glued to this instruction. + // + // 3. A glued instruction may implicitly use a physreg. + // + // 4. A glued instruction may use a RegisterSDNode operand. + // + // Collect all the used physreg defs, and make sure that any unused physreg + // defs are marked as dead. + SmallVector<unsigned, 8> UsedRegs; + + // Additional results must be physical register defs. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (!Node->hasAnyUseOfValue(i)) + continue; + // This implicitly defined physreg has a use. + UsedRegs.push_back(Reg); + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + + // Scan the glue chain for any used physregs. + if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } + } + } + + // Finally mark unused registers as dead. + if (!UsedRegs.empty() || II.getImplicitDefs()) + MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); + + // Run post-isel target hook to adjust this instruction if needed. +#ifdef NDEBUG + if (II.hasPostISelHook()) +#endif + TLI->AdjustInstrPostInstrSelection(MIB, Node); +} + +/// EmitSpecialNode - Generate machine code for a target-independent node and +/// needed dependencies. +void InstrEmitter:: +EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(); +#endif + llvm_unreachable("This target-independent node should have been selected!"); + case ISD::EntryToken: + llvm_unreachable("EntryToken should have been excluded from the schedule!"); + case ISD::MERGE_VALUES: + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::EH_LABEL: { + MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel(); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::EH_LABEL)).addSym(S); + break; + } + + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + // Create the inline asm machine instruction. + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::INLINEASM)); + + // Add the asm string as an external symbol operand. + SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); + const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); + MIB.addExternalSymbol(AsmStr); + + // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore + // bits. + int64_t ExtraInfo = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> + getZExtValue(); + MIB.addImm(ExtraInfo); + + // Remember to operand index of the group flags. + SmallVector<unsigned, 8> GroupIdx; + + // Add all of the operand registers to the instruction. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + GroupIdx.push_back(MIB->getNumOperands()); + MIB.addImm(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + for (unsigned j = 0; j != NumVals; ++j, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MIB.addReg(Reg, RegState::Define | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + } + break; + case InlineAsm::Kind_RegDefEarlyClobber: + case InlineAsm::Kind_Clobber: + for (unsigned j = 0; j != NumVals; ++j, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + } + break; + case InlineAsm::Kind_RegUse: // Use of register. + case InlineAsm::Kind_Imm: // Immediate. + case InlineAsm::Kind_Mem: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (unsigned j = 0; j != NumVals; ++j, ++i) + AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MIB->tieOperands(DefIdx + j, UseIdx + j); + } + } + break; + } + } + + // Get the mdnode from the asm if it exists and add it to the instruction. + SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); + const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); + if (MD) + MIB.addMetadata(MD); + + MBB->insert(InsertPos, MIB); + break; + } + } +} + +/// InstrEmitter - Construct an InstrEmitter and set it to start inserting +/// at the given position in the given block. +InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, + MachineBasicBlock::iterator insertpos) + : MF(mbb->getParent()), + MRI(&MF->getRegInfo()), + TM(&MF->getTarget()), + TII(TM->getInstrInfo()), + TRI(TM->getRegisterInfo()), + TLI(TM->getTargetLowering()), + MBB(mbb), InsertPos(insertpos) { +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h new file mode 100644 index 0000000..a9c2203 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -0,0 +1,146 @@ +//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTREMITTER_H +#define INSTREMITTER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + +class MachineInstrBuilder; +class MCInstrDesc; +class SDDbgValue; + +class InstrEmitter { + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertPos; + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const; + + void CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, + const MCInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// AddOperand - Add the specified operand to the specified machine instr. II + /// specifies the instruction information for the node, and IIOpNum is the + /// operand number (in the II) that we are adding. IIOpNum and II are used for + /// assertions only. + void AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// ConstrainForSubReg - Try to constrain VReg to a register class that + /// supports SubIdx sub-registers. Emit a copy if that isn't possible. + /// Return the virtual register to use. + unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, + MVT VT, DebugLoc DL); + + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. + /// COPY_TO_REGCLASS is just a normal copy, except that the destination + /// register is constrained to be in a particular register class. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. + /// + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); +public: + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// EmitDbgValue - Generate machine instruction for a dbg_value node. + /// + MachineInstr *EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitNode - Generate machine code for a node and needed dependencies. + /// + void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Node->isMachineOpcode()) + EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); + else + EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap); + } + + /// getBlock - Return the current basic block. + MachineBasicBlock *getBlock() { return MBB; } + + /// getInsertPos - Return the current insertion position. + MachineBasicBlock::iterator getInsertPos() { return InsertPos; } + + /// InstrEmitter - Construct an InstrEmitter and set it to start inserting + /// at the given position in the given block. + InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); + +private: + void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 0000000..51cc254 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,3924 @@ +//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +/// +namespace { +class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { + const TargetMachine &TM; + const TargetLowering &TLI; + SelectionDAG &DAG; + + /// LegalizePosition - The iterator for walking through the node list. + SelectionDAG::allnodes_iterator LegalizePosition; + + /// LegalizedNodes - The set of nodes which have already been legalized. + SmallPtrSet<SDNode *, 16> LegalizedNodes; + + // Libcall insertion helpers. + +public: + explicit SelectionDAGLegalize(SelectionDAG &DAG); + + void LegalizeDAG(); + +private: + /// LegalizeOp - Legalizes the given operation. + void LegalizeOp(SDNode *Node); + + SDValue OptimizeFloatStore(StoreSDNode *ST); + + void LegalizeLoadOps(SDNode *Node); + void LegalizeStoreOps(SDNode *Node); + + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it + /// is necessary to spill the vector being inserted into to memory, perform + /// the insert there, and then read the result back. + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + + /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// performs the same shuffe in terms of order or result bytes, but on a type + /// whose vector element type is narrower than the original shuffle type. + /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + ArrayRef<int> Mask) const; + + void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + DebugLoc dl); + + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, DebugLoc dl); + + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); + SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); + void ExpandDYNAMIC_STACKALLOC(SDNode *Node, + SmallVectorImpl<SDValue> &Results); + SDValue ExpandFCOPYSIGN(SDNode *Node); + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, + DebugLoc dl); + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, + DebugLoc dl); + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, + DebugLoc dl); + + SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + + SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandInsertToVectorThroughStack(SDValue Op); + SDValue ExpandVectorBuildThroughStack(SDNode* Node); + + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + + void ExpandNode(SDNode *Node); + void PromoteNode(SDNode *Node); + + void ForgetNode(SDNode *N) { + LegalizedNodes.erase(N); + if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) + ++LegalizePosition; + } + +public: + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + ForgetNode(N); + } + virtual void NodeUpdated(SDNode *N) {} + + // Node replacement helpers + void ReplacedNode(SDNode *N) { + if (N->use_empty()) { + DAG.RemoveDeadNode(N); + } else { + ForgetNode(N); + } + } + void ReplaceNode(SDNode *Old, SDNode *New) { + DAG.ReplaceAllUsesWith(Old, New); + ReplacedNode(Old); + } + void ReplaceNode(SDValue Old, SDValue New) { + DAG.ReplaceAllUsesWith(Old, New); + ReplacedNode(Old.getNode()); + } + void ReplaceNode(SDNode *Old, const SDValue *New) { + DAG.ReplaceAllUsesWith(Old, New); + ReplacedNode(Old); + } +}; +} + +/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// performs the same shuffe in terms of order or result bytes, but on a type +/// whose vector element type is narrower than the original shuffle type. +/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + ArrayRef<int> Mask) const { + unsigned NumMaskElts = VT.getVectorNumElements(); + unsigned NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); + } + } + assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); +} + +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) + : SelectionDAG::DAGUpdateListener(dag), + TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag) { +} + +void SelectionDAGLegalize::LegalizeDAG() { + DAG.AssignTopologicalOrder(); + + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = DAG.allnodes_end(); + LegalizePosition != DAG.allnodes_begin(); ) { + --LegalizePosition; + + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + LegalizeOp(N); + } + } + if (!AnyLegalized) + break; + + } + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + +/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { + bool Extend = false; + DebugLoc dl = CFP->getDebugLoc(); + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. This shrinks FP constants and canonicalizes them for targets where + // an FP extending load is the same cost as a normal load (such as on the x87 + // fp stack or PPC FP unit). + EVT VT = CFP->getValueType(0); + ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); + if (!UseCP) { + assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), + (VT == MVT::f64) ? MVT::i64 : MVT::i32); + } + + EVT OrigVT = VT; + EVT SVT = VT; + while (SVT != MVT::f32) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); + if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + Type *SType = SVT.getTypeForEVT(*DAG.getContext()); + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } + } + + SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + if (Extend) { + SDValue Result = + DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, false, + Alignment); + return Result; +} + +/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAGLegalize *DAGLegalize) { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + EVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + DebugLoc dl = ST->getDebugLoc(); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; + } + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + EVT StoredVT = ST->getMemoryVT(); + MVT RegVT = + TLI.getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, MachinePointerInfo(), + StoredVT, false, false, 0); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, + MachinePointerInfo(), + false, false, false, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), + ST->isVolatile(), ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), + MemVT, false, false, 0); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo() + .getWithOffset(Offset), + MemVT, ST->isVolatile(), + ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; + } + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = DAG.getConstant(NumBits, + TLI.getShiftAmountTy(Val.getValueType())); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, + ST->getPointerInfo(), NewStoredVT, + ST->isVolatile(), ST->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), + Alignment); + + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); +} + +/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + if (VT.isFloatingPoint() || VT.isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); + if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), + LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); + + ValResult = Result; + ChainResult = Chain; + return; + } + + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), false, false, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), MemVT, + false, false, 0)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + MachinePointerInfo(), LoadedVT, false, false, 0); + + // Callers expect a MERGE_VALUES node. + ValResult = Load; + ChainResult = TF; + return; + } + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (TLI.isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + } + + // aggregate the two parts + SDValue ShiftAmount = DAG.getConstant(NumBits, + TLI.getShiftAmountTy(Hi.getValueType())); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + ValResult = Result; + ChainResult = TF; +} + +/// PerformInsertVectorEltInMemory - Some target cannot handle a variable +/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// is necessary to spill the vector being inserted into to memory, perform +/// the insert there, and then read the result back. +SDValue SelectionDAGLegalize:: +PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + DebugLoc dl) { + SDValue Tmp1 = Vec; + SDValue Tmp2 = Val; + SDValue Tmp3 = Idx; + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + EVT VT = Tmp1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Tmp3.getValueType(); + EVT PtrVT = TLI.getPointerTy(); + SDValue StackPtr = DAG.CreateStackTemporary(VT); + + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + + // Store the vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, + MachinePointerInfo::getFixedStack(SPFI), + false, false, 0); + + // Truncate or zero extend offset to target pointer type. + unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + // Add the offset to the index. + unsigned EltSize = EltVT.getSizeInBits()/8; + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + // Store the scalar value. + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, + false, false, 0); + // Load the updated vector. + return DAG.getLoad(VT, dl, Ch, StackPtr, + MachinePointerInfo::getFixedStack(SPFI), false, false, + false, 0); +} + + +SDValue SelectionDAGLegalize:: +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { + // SCALAR_TO_VECTOR requires that the type of the value being inserted + // match the element type of the vector being created, except for + // integers in which case the inserted value can be over width. + EVT EltVT = Vec.getValueType().getVectorElementType(); + if (Val.getValueType() == EltVT || + (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { + SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + Vec.getValueType(), Val); + + unsigned NumElts = Vec.getValueType().getVectorNumElements(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask + // should be 0,1,2,3,4,5... with the appropriate element replaced with + // elt 0 of the RHS. + SmallVector<int, 8> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, + &ShufOps[0]); + } + } + return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); +} + +SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + DebugLoc dl = ST->getDebugLoc(); + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + TLI.isTypeLegal(MVT::i32)) { + SDValue Con = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + MVT::i32); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + } + + if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (TLI.isTypeLegal(MVT::i64)) { + SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), MVT::i64); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + } + + if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(4)); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + } + } + return SDValue(0, 0); +} + +void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + DebugLoc dl = Node->getDebugLoc(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + if (!ST->isTruncatingStore()) { + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + { + SDValue Value = ST->getValue(); + MVT VT = Value.getSimpleValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), + DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + } else { + SDValue Value = ST->getValue(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, + isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), + StVT.getSimpleVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(TLI.isTypeLegal(StVT) && + "Do not know how to expand this store!"); + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + } +} + +void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDValue Chain = LD->getChain(); // The chain. + SDValue Ptr = LD->getBasePtr(); // The base pointer. + SDValue Value; // The value returned by the load op. + DebugLoc dl = Node->getDebugLoc(); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + MVT VT = Node->getSimpleValueType(0); + SDValue RVal = SDValue(Node, 0); + SDValue RChain = SDValue(Node, 1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); + } + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote loads to same size type"); + + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); + RChain = Res.getValue(1); + break; + } + } + if (RChain.getNode() != Node) { + assert(RVal.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + ReplacedNode(Node); + } + return; + } + + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Value = Result; + Chain = Ch; + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Chain = Ch; + } else { + bool isCustom = false; + switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: { + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Value, Chain); + } + } + } + break; + } + case TargetLowering::Expand: + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + if (Chain.getNode() != Node) { + assert(Value.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + } +} + +/// LegalizeOp - Return a legal replacement for the given operation, with +/// all legal operands. +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; + + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && + "Unexpected illegal type!"); + + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + assert((TLI.getTypeAction(*DAG.getContext(), + Node->getOperand(i).getValueType()) == + TargetLowering::TypeLegal || + Node->getOperand(i).getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); + + // Figure out the correct action; the way to query this varies by opcode + TargetLowering::LegalizeAction Action = TargetLowering::Legal; + bool SimpleFinishLegalizing = true; + switch (Node->getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::STACKSAVE: + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::EXTRACT_VECTOR_ELT: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: { + EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), InnerType); + break; + } + case ISD::ATOMIC_STORE: { + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(2).getValueType()); + break; + } + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::BR_CC: { + unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::SETCC ? 2 : 1; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) { + if (Node->getOpcode() == ISD::SELECT_CC) + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + else + Action = TLI.getOperationAction(Node->getOpcode(), OpVT); + } + break; + } + case ISD::LOAD: + case ISD::STORE: + // FIXME: Model these properly. LOAD and STORE are complicated, and + // STORE expects the unlegalized operand in some cases. + SimpleFinishLegalizing = false; + break; + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + // FIXME: This shouldn't be necessary. These nodes have special properties + // dealing with the recursive nature of legalization. Removing this + // special case should be done as part of making LegalizeDAG non-recursive. + SimpleFinishLegalizing = false; + break; + case ISD::EXTRACT_ELEMENT: + case ISD::FLT_ROUNDS_: + case ISD::SADDO: + case ISD::SSUBO: + case ISD::UADDO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + case ISD::FPOWI: + case ISD::MERGE_VALUES: + case ISD::EH_RETURN: + case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: + // These operations lie about being legal: when they claim to be legal, + // they should actually be expanded. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; + case ISD::INIT_TRAMPOLINE: + case ISD::ADJUST_TRAMPOLINE: + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + // These operations lie about being legal: when they claim to be legal, + // they should actually be custom-lowered. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Custom; + break; + case ISD::DEBUGTRAP: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Expand) { + // replace ISD::DEBUGTRAP with ISD::TRAP + SDValue NewVal; + NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(), + Node->getOperand(0)); + ReplaceNode(Node, NewVal.getNode()); + LegalizeOp(NewVal.getNode()); + return; + } + break; + + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + Action = TargetLowering::Legal; + } else { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + } + break; + } + + if (SimpleFinishLegalizing) { + SDNode *NewNode = Node; + switch (Node->getOpcode()) { + default: break; + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Node->getOperand(1).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(1)); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Handle.getValue()); + } + break; + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: + case ISD::SHL_PARTS: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Node->getOperand(2).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(2)); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Node->getOperand(1), + Handle.getValue()); + } + break; + } + + if (NewNode != Node) { + DAG.ReplaceAllUsesWith(Node, NewNode); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); + ReplacedNode(Node); + Node = NewNode; + } + switch (Action) { + case TargetLowering::Legal: + return; + case TargetLowering::Custom: { + // FIXME: The handling for custom lowering with multiple results is + // a complete mess. + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + SmallVector<SDValue, 8> ResultVals; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { + if (e == 1) + ResultVals.push_back(Res); + else + ResultVals.push_back(Res.getValue(i)); + } + if (Res.getNode() != Node || Res.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data()); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); + ReplacedNode(Node); + } + return; + } + } + // FALL THROUGH + case TargetLowering::Expand: + ExpandNode(Node); + return; + case TargetLowering::Promote: + PromoteNode(Node); + return; + } + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "NODE: "; + Node->dump( &DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to legalize this operator!"); + + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + break; + case ISD::LOAD: { + return LegalizeLoadOps(Node); + } + case ISD::STORE: { + return LegalizeStoreOps(Node); + } + } +} + +SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + // Store the value to a temporary stack slot, then LOAD the returned part. + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); + else + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + + if (Op.getValueType().isVector()) + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), + false, false, false, 0); + return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), + false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { + assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); + + SDValue Vec = Op.getOperand(0); + SDValue Part = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Store the value to a temporary stack slot, then LOAD the returned part. + + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + + // First store the whole vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + false, false, 0); + + // Then store the inserted part. + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); + else + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + StackPtr); + + // Store the subvector. + Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, + MachinePointerInfo(), false, false, 0); + + // Finally, load the updated vector. + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, + false, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + // We can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Node->getDebugLoc(); + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + // If the destination vector element type is narrower than the source + // element type, only store the bits necessary. + if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), + EltVT, false, false, 0)); + } else + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), + false, false, 0)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + false, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + + // Get the sign bit of the RHS. First obtain a value that has the same + // sign as the sign bit, i.e. negative if and only if the sign bit is 1. + SDValue SignBit; + EVT FloatVT = Tmp2.getValueType(); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); + if (TLI.isTypeLegal(IVT)) { + // Convert to an integer with the same sign bit. + SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); + } else { + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI.getPointerTy(); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); + // Then store the float to it. + SDValue Ch = + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), + false, false, 0); + if (TLI.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), + false, false, false, 0); + } else { // Little endian + SDValue LoadPtr = StackPtr; + // The float may be wider than the integer we are going to load. Advance + // the pointer so that the loaded integer will contain the sign bit. + unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); + unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; + LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), + LoadPtr, DAG.getIntPtrConstant(ByteOffset)); + // Load a legal integer containing the sign bit. + SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), + false, false, false, 0); + // Move the sign bit to the top bit of the loaded integer. + unsigned BitShift = LoadTy.getSizeInBits() - + (FloatVT.getSizeInBits() - 8 * ByteOffset); + assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); + if (BitShift) + SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit, + DAG.getConstant(BitShift, + TLI.getShiftAmountTy(SignBit.getValueType()))); + } + } + // Now get the sign bit proper, by seeing whether the value is negative. + SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit, DAG.getConstant(0, SignBit.getValueType()), + ISD::SETLT); + // Get the absolute value of the result. + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); + // Select between the nabs and abs value based on the sign bit of + // the input. + return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); +} + +void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, + SmallVectorImpl<SDValue> &Results) { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + DebugLoc dl = Node->getDebugLoc(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + if (Align > StackAlign) + SP = DAG.getNode(ISD::AND, dl, VT, SP, + DAG.getConstant(-(uint64_t)Align, VT)); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + + Results.push_back(Tmp1); + Results.push_back(Tmp2); +} + +/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and +/// condition code CC on the current target. This routine expands SETCC with +/// illegal condition code into AND / OR of multiple SETCC values. +void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, + SDValue &LHS, SDValue &RHS, + SDValue &CC, + DebugLoc dl) { + MVT OpVT = LHS.getSimpleValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: llvm_unreachable("Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + ISD::CondCode InvCC = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETO: + assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) + == TargetLowering::Legal + && "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETUO: + assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) + == TargetLowering::Legal + && "If SETUO is expanded, SETUNE must be legal!"); + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + case ISD::SETNE: + case ISD::SETEQ: + InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { + // We only support using the inverted operation and not a + // different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); + } + LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); + RHS = SDValue(); + CC = SDValue(); + return; + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + } + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + break; + } + } +} + +/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does +/// a load from the stack slot to DestVT, extending it if needed. +/// The resultant code need not be legal. +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, + EVT SlotVT, + EVT DestVT, + DebugLoc dl) { + // Create the stack frame object. + unsigned SrcAlign = + TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). + getTypeForEVT(*DAG.getContext())); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); + int SPFI = StackPtrFI->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + + unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType); + + // Emit a store to the stack slot. Use a truncstore if the input value is + // later than DestVT. + SDValue Store; + + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + PtrInfo, SlotVT, false, false, SrcAlign); + else { + assert(SrcSize == SlotSize && "Invalid store"); + Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + PtrInfo, false, false, SrcAlign); + } + + // Result is a load from the stack slot. + if (SlotSize == DestSize) + return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, + false, false, false, DestAlign); + + assert(SlotSize < DestSize && "Unknown extension!"); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, + PtrInfo, SlotVT, false, false, DestAlign); +} + +SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); + int SPFI = StackPtrFI->getIndex(); + + SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), + StackPtr, + MachinePointerInfo::getFixedStack(SPFI), + Node->getValueType(0).getVectorElementType(), + false, false, 0); + return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, + MachinePointerInfo::getFixedStack(SPFI), + false, false, false, 0); +} + + +/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + unsigned NumElems = Node->getNumOperands(); + SDValue Value1, Value2; + DebugLoc dl = Node->getDebugLoc(); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + bool isOnlyLowElement = true; + bool MoreThanTwoValues = false; + bool isConstant = true; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + if (!Value1.getNode()) { + Value1 = V; + } else if (!Value2.getNode()) { + if (V != Value1) + Value2 = V; + } else if (V != Value1 && V != Value2) { + MoreThanTwoValues = true; + } + } + + if (!Value1.getNode()) + return DAG.getUNDEF(VT); + + if (isOnlyLowElement) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + SmallVector<Constant*, 16> CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast<ConstantSDNode>(Node->getOperand(i))) { + if (OpVT==EltVT) + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + else { + // If OpVT and EltVT don't match, EltVT is not legal and the + // element values have been promoted/truncated earlier. Undo this; + // we don't want a v16i8 to become a v16i32 for example. + const ConstantInt *CI = V->getConstantIntValue(); + CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), + CI->getZExtValue())); + } + } else { + assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + } + + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + } + } + + // Otherwise, we can't handle this case efficiently. + return ExpandVectorBuildThroughStack(Node); +} + +// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + SDValue TCChain = InChain; + bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); + if (isTailCall) + InChain = TCChain; + + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), isTailCall, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + + return CallInfo.first; +} + +/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo.first; +} + +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::f128: LC = Call_F128; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + return ExpandLibCall(LC, Node, false); +} + +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC = Call_I8; break; + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; + case MVT::i128: LC = Call_I128; break; + } + return ExpandLibCall(LC, Node, isSigned); +} + +/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != 0; +} + +/// useDivRem - Only issue divrem libcall if both quotient and remainder are +/// needed. +static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { + // The other use might have been replaced with a divrem already. + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + unsigned OtherOpcode = 0; + if (isSigned) + OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; + else + OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) + return true; + } + return false; +} + +/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem +/// pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + // Also pass the return address of the remainder. + SDValue FIPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = FIPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + DebugLoc dl = Node->getDebugLoc(); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + // Remainder is loaded back from the stack frame. + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, + MachinePointerInfo(), false, false, false, 0); + Results.push_back(CallInfo.first); + Results.push_back(Rem); +} + +/// isSinCosLibcallAvailable - Return true if sincos libcall is available. +static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + return TLI.getLibcallName(LC) != 0; +} + +/// canCombineSinCosLibcall - Return true if sincos libcall is available and +/// can be used to combine sin and cos. +static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, + const TargetMachine &TM) { + if (!isSinCosLibcallAvailable(Node, TLI)) + return false; + // GNU sin/cos functions set errno while sincos does not. Therefore + // combining sin and cos is only safe if unsafe-fpmath is enabled. + bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; + if (isGNU && !TM.Options.UnsafeFPMath) + return false; + return true; +} + +/// useSinCos - Only issue sincos libcall if both sin and cos are +/// needed. +static bool useSinCos(SDNode *Node) { + unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN + ? ISD::FCOS : ISD::FSIN; + + SDValue Op0 = Node->getOperand(0); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + // The other user might have been turned into sincos already. + if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) + return true; + } + return false; +} + +/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos +/// pairs. +void +SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // Pass the argument. + Entry.Node = Node->getOperand(0); + Entry.Ty = RetTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Pass the return address of sin. + SDValue SinPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = SinPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Also pass the return address of the cos. + SDValue CosPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = CosPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + DebugLoc dl = Node->getDebugLoc(); + TargetLowering:: + CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, + MachinePointerInfo(), false, false, false, 0)); + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, + MachinePointerInfo(), false, false, false, 0)); +} + +/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, + SDValue Op0, + EVT DestVT, + DebugLoc dl) { + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { + // simple 32-bit [signed|unsigned] integer to float/double expansion + + // Get the stack frame index of a 8 byte buffer. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); + + // word offset constant for Hi/Lo address computation + SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + // set up Hi and Lo (into buffer) address based on endian + SDValue Hi = StackSlot; + SDValue Lo = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), StackSlot, WordOff); + if (TLI.isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDValue Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, + Op0Mapped, Lo, MachinePointerInfo(), + false, false, 0); + // initial hi portion of constructed double + SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + // store the hi of the constructed double - biased exponent + SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, + MachinePointerInfo(), + false, false, 0); + // load the constructed double + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, + MachinePointerInfo(), false, false, false, 0); + // FP constant to bias correct the final result + SDValue Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) : + BitsToDouble(0x4330000000000000ULL), + MVT::f64); + // subtract the bias + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + // final result + SDValue Result; + // handle final rounding + if (DestVT == MVT::f64) { + // do nothing + Result = Sub; + } else if (DestVT.bitsLT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, + DAG.getIntPtrConstant(0)); + } else if (DestVT.bitsGT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); + } + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !isSigned without checking again. + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + SDValue TwoP52 = + DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64); + SDValue TwoP84PlusTwoP52 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64); + SDValue TwoP84 = + DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64); + + SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); + SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, + DAG.getConstant(32, MVT::i64)); + SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); + SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); + SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, + TwoP84PlusTwoP52); + return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); + } + + // Implementation of unsigned i64 to f32. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundidf in compiler_rt. + if (!isSigned) { + SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); + + SDValue ShiftConst = + DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType())); + SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); + SDValue AndConst = DAG.getConstant(1, MVT::i64); + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); + + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); + SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + //pseudo-op, or, even better, for whole-function isel. + SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); + return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + } + + // Otherwise, implement the fully general conversion. + + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, + DAG.getConstant(UINT64_C(0x800), MVT::i64)); + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); + SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), + ISD::SETUGE); + SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); + + SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, + DAG.getConstant(32, SHVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); + SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); + SDValue TwoP32 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64); + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); + SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); + SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); + return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, + DAG.getIntPtrConstant(0)); + } + + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + + SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETLT); + SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); + SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (TLI.isLittleEndian()) FF <<= 32; + Constant *FudgeFactor = ConstantInt::get( + Type::getInt64Ty(*DAG.getContext()), FF); + + SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + Alignment = std::min(Alignment, 4u); + SDValue FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + else { + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); + } + + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); +} + +/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, + EVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + EVT NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); + assert(NewInTy.isInteger() && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { + OpToUse = ISD::SINT_TO_FP; + break; + } + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { + OpToUse = ISD::UINT_TO_FP; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)); +} + +/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, + EVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate FP_TO*INT operation to use. + EVT NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { + OpToUse = ISD::FP_TO_SINT; + break; + } + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + OpToUse = ISD::FP_TO_UINT; + break; + } + + // Otherwise, try a larger type. + } + + + // Okay, we found the operation and type to use. + SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + + // Truncate the result of the extended FP_TO_*INT operation to the desired + // size. + return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); +} + +/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. +/// +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); + case MVT::i16: + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +/// ExpandBitCount - Expand the specified bitcount instruction into operations. +/// +SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, + DebugLoc dl) { + switch (Opc) { + default: llvm_unreachable("Cannot expand this yet!"); + case ISD::CTPOP: { + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(VT); + unsigned Len = VT.getSizeInBits(); + + assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && + "CTPOP not implemented for this type."); + + // This is the "best" algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + + SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT); + SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT); + SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT); + SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT); + + // v = v - ((v >> 1) & 0x55555555...) + Op = DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(1, ShVT)), + Mask55)); + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Mask33), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(2, ShVT)), + Mask33)); + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Op = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(4, ShVT))), + Mask0F); + // v = (v * 0x01010101...) >> (Len - 8) + Op = DAG.getNode(ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, ShVT)); + + return Op; + } + case ISD::CTLZ_ZERO_UNDEF: + // This trivially expands to CTLZ. + return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); + case ISD::CTLZ: { + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(VT); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); + } + Op = DAG.getNOT(dl, Op, VT); + return DAG.getNode(ISD::CTPOP, dl, VT, Op); + } + case ISD::CTTZ_ZERO_UNDEF: + // This trivially expands to CTTZ. + return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); + case ISD::CTTZ: { + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // see also http://www.hackersdelight.org/HDcode/ntz.cc + EVT VT = Op.getValueType(); + SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getConstant(1, VT))); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(VT.getSizeInBits(), VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); + } + } +} + +std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + +void SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + switch (Node->getOpcode()) { + case ISD::CTPOP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); + Results.push_back(Tmp1); + break; + case ISD::BSWAP: + Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + Results.push_back(DAG.getConstant(0, Node->getValueType(0))); + break; + case ISD::FLT_ROUNDS_: + Results.push_back(DAG.getConstant(1, Node->getValueType(0))); + break; + case ISD::EH_RETURN: + case ISD::EH_LABEL: + case ISD::PREFETCH: + case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + // If the target didn't expand these, there's nothing to do, so just + // preserve the chain and be done. + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.push_back(DAG.getConstant(0, MVT::i32)); + Results.push_back(Node->getOperand(0)); + break; + case ISD::ATOMIC_FENCE: + case ISD::MEMBARRIER: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + // FIXME: handle "fence singlethread" more efficiently. + TargetLowering::ArgListTy Args; + TargetLowering:: + CallLoweringInfo CLI(Node->getOperand(0), + Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), + Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + case ISD::ATOMIC_LOAD: { + // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. + SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(0)); + Results.push_back(Swap.getValue(1)); + break; + } + case ISD::ATOMIC_STORE: { + // There is no libcall for atomic store; fake it with ATOMIC_SWAP. + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2), + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(1)); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } + case ISD::DYNAMIC_STACKALLOC: + ExpandDYNAMIC_STACKALLOC(Node, Results); + break; + case ISD::MERGE_VALUES: + for (unsigned i = 0; i < Node->getNumValues(); i++) + Results.push_back(Node->getOperand(i)); + break; + case ISD::UNDEF: { + EVT VT = Node->getValueType(0); + if (VT.isInteger()) + Results.push_back(DAG.getConstant(0, VT)); + else { + assert(VT.isFloatingPoint() && "Unknown value type!"); + Results.push_back(DAG.getConstantFP(0, VT)); + } + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + TargetLowering:: + CallLoweringInfo CLI(Node->getOperand(0), + Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + case ISD::FP_ROUND: + case ISD::BITCAST: + Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::SIGN_EXTEND_INREG: { + // NOTE: we could fall back on load/store here too for targets without + // SAR. However, it is doubtful that any exist. + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + EVT ShiftAmountTy = TLI.getShiftAmountTy(VT); + if (VT.isVector()) + ShiftAmountTy = VT; + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); + Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + Results.push_back(Tmp1); + break; + } + case ISD::FP_ROUND_INREG: { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targeting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getOperand(0), Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_TO_UINT: { + SDValue True, False; + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + APFloat apf(DAG.EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); + APInt x = APInt::getSignBit(NVT.getSizeInBits()); + (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); + Tmp1 = DAG.getConstantFP(apf, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Node->getOperand(0), + Tmp1, ISD::SETLT); + True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, + DAG.getNode(ISD::FSUB, dl, VT, + Node->getOperand(0), Tmp1)); + False = DAG.getNode(ISD::XOR, dl, NVT, False, + DAG.getConstant(x, NVT)); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Results.push_back(Tmp1); + break; + } + case ISD::VAARG: { + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, + MachinePointerInfo(V), + false, false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(Align - 1, + TLI.getPointerTy())); + + VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(-(int64_t)Align, + TLI.getPointerTy())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(TLI.getDataLayout()-> + getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), + TLI.getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, + MachinePointerInfo(V), false, false, 0); + // Load the actual argument out of the pointer VAList + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), + false, false, false, 0)); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::VACOPY: { + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), + Node->getOperand(2), MachinePointerInfo(VS), + false, false, false, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_VECTOR_ELT: + if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + // This must be an access of the only element. Return it. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), + Node->getOperand(0)); + else + Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); + Results.push_back(Tmp1); + break; + case ISD::EXTRACT_SUBVECTOR: + Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::INSERT_SUBVECTOR: + Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::CONCAT_VECTORS: { + Results.push_back(ExpandVectorBuildThroughStack(Node)); + break; + } + case ISD::SCALAR_TO_VECTOR: + Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); + break; + case ISD::INSERT_VECTOR_ELT: + Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), + Node->getOperand(1), + Node->getOperand(2), dl)); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 32> NewMask; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); + + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!TLI.isTypeLegal(EltVT)) { + + EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + + // BUILD_VECTOR operands are allowed to be wider than the element type. + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + if (NewEltVT.bitsLT(EltVT)) { + + // Convert shuffle node. + // If original node was v4i64 and the new EltVT is i32, + // cast operands to v8i32 and re-build the mask. + + // Calculate new VT, the size of the new VT should be equal to original. + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits()/NewEltVT.getSizeInBits()); + assert(NewVT.bitsEq(VT)); + + // cast operands to new VT + Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); + + // Convert the shuffle mask + unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + + // EltVT gets smaller + assert(factor > 0); + + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]); + } + else { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]*factor+fi); + } + } + Mask = NewMask; + VT = NewVT; + } + EltVT = NewEltVT; + } + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + continue; + } + unsigned Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Op0, + DAG.getIntPtrConstant(Idx))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Op1, + DAG.getIntPtrConstant(Idx - NumElems))); + } + + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_ELEMENT: { + EVT OpTy = Node->getOperand(0).getValueType(); + if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + // 1 -> Hi + Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getConstant(OpTy.getSizeInBits()/2, + TLI.getShiftAmountTy(Node->getOperand(0).getValueType()))); + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); + } else { + // 0 -> Lo + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), + Node->getOperand(0)); + } + Results.push_back(Tmp1); + break; + } + case ISD::STACKSAVE: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + } else { + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::STACKRESTORE: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, + Node->getOperand(1))); + } else { + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::FCOPYSIGN: + Results.push_back(ExpandFCOPYSIGN(Node)); + break; + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, + Node->getOperand(0)); + Results.push_back(Tmp1); + break; + case ISD::FABS: { + // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). + EVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, ISD::SETUGT); + Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); + Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + case ISD::FCOS: { + EVT VT = Node->getValueType(0); + bool isSIN = Node->getOpcode() == ISD::FSIN; + // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / + // fcos which share the same operand and both are used. + if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || + canCombineSinCosLibcall(Node, TLI, TM)) + && useSinCos(Node)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); + if (!isSIN) + Tmp1 = Tmp1.getValue(1); + Results.push_back(Tmp1); + } else if (isSIN) { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + } + break; + } + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); + break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); + break; + case ISD::FP16_TO_FP32: + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + break; + case ISD::FP32_TO_FP16: + Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false)); + break; + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + // Check to see if this FP immediate is already legal. + // If this is a legal constant, turn it into a TargetConstantFP node. + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); + break; + } + case ISD::EHSELECTION: { + unsigned Reg = TLI.getExceptionSelectorRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::EXCEPTIONADDR: { + unsigned Reg = TLI.getExceptionPointerRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && + "Don't know how to expand this FP subtraction!"); + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::XOR, VT) && + "Don't know how to expand this subtraction!"); + Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT)); + Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); + break; + } + case ISD::UREM: + case ISD::SREM: { + EVT VT = Node->getValueType(0); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + Tmp2 = Node->getOperand(0); + Tmp3 = Node->getOperand(1); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + // If div is legal, it's better to do the normal expansion + !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && + useDivRem(Node, isSigned, false))) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + } else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128); + Results.push_back(Tmp1); + break; + } + case ISD::UDIV: + case ISD::SDIV: { + bool isSigned = Node->getOpcode() == ISD::SDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + useDivRem(Node, isSigned, true))) + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128); + Results.push_back(Tmp1); + break; + } + case ISD::MULHU: + case ISD::MULHS: { + unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : + ISD::SMUL_LOHI; + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && + "If this wasn't legal, it shouldn't have been created!"); + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; + case ISD::MUL: { + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + // See if multiply or divide can be lowered using two-result operations. + // We just need the low half of the multiply; try both the signed + // and unsigned forms. If the target supports both SMUL_LOHI and + // UMUL_LOHI, form a preference by checking which forms of plain + // MULH it supports. + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); + unsigned OpToUse = 0; + if (HasSMUL_LOHI && !HasMULHS) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI && !HasMULHU) { + OpToUse = ISD::UMUL_LOHI; + } else if (HasSMUL_LOHI) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI) { + OpToUse = ISD::UMUL_LOHI; + } + if (OpToUse) { + Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), + Node->getOperand(1))); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128); + Results.push_back(Tmp1); + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + EVT OType = Node->getValueType(1); + + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Results.push_back(Cmp); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, + Node->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT)); + break; + } + case ISD::UMULO: + case ISD::SMULO: { + EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue BottomHalf; + SDValue TopHalf; + static const unsigned Ops[2][3] = + { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, + { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; + bool isSigned = Node->getOpcode() == ISD::SMULO; + if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) { + BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); + } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) { + BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, + RHS); + TopHalf = BottomHalf.getValue(1); + } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() * 2))) { + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(1)); + } else { + // We can fall back to a libcall with an illegal type for the MUL if we + // have a libcall big enough. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (WideVT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (WideVT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (WideVT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (WideVT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); + + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(Ret.getNode()); + } + + if (isSigned) { + Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, + TLI.getShiftAmountTy(BottomHalf.getValueType())); + Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); + TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, + ISD::SETNE); + } else { + TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, + DAG.getConstant(0, VT), ISD::SETNE); + } + Results.push_back(BottomHalf); + Results.push_back(TopHalf); + break; + } + case ISD::BUILD_PAIR: { + EVT PairTy = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits()/2, + TLI.getShiftAmountTy(PairTy))); + Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); + break; + } + case ISD::SELECT: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + if (Tmp1.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); + } else { + Tmp1 = DAG.getSelectCC(dl, Tmp1, + DAG.getConstant(0, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + Results.push_back(Tmp1); + break; + case ISD::BR_JT: { + SDValue Chain = Node->getOperand(0); + SDValue Table = Node->getOperand(1); + SDValue Index = Node->getOperand(2); + + EVT PTy = TLI.getPointerTy(); + + const DataLayout &TD = *TLI.getDataLayout(); + unsigned EntrySize = + DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); + + Index = DAG.getNode(ISD::MUL, dl, PTy, + Index, DAG.getConstant(EntrySize, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(), MemVT, + false, false, 0); + Addr = LD; + if (TM.getRelocationModel() == Reloc::PIC_) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, + TLI.getPICJumpTableRelocBase(Table, DAG)); + } + Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + Results.push_back(Tmp1); + break; + } + case ISD::BRCOND: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + if (Tmp2.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, + Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + // We test only the i1 bit. Skip the AND if UNDEF. + Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : + DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, + DAG.getConstant(1, Tmp2.getValueType())); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp3, + DAG.getConstant(0, Tmp3.getValueType()), + Node->getOperand(2)); + } + Results.push_back(Tmp1); + break; + case ISD::SETCC: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + + // If we expanded the SETCC into an AND/OR, return the new node + if (Tmp2.getNode() == 0) { + Results.push_back(Tmp1); + break; + } + + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + EVT VT = Node->getValueType(0); + int TrueValue; + switch (TLI.getBooleanContents(VT.isVector())) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = 1; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = -1; + break; + } + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, + DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT), + Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = Node->getOperand(2); // True + Tmp4 = Node->getOperand(3); // False + SDValue CC = Node->getOperand(4); + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, CC, dl); + + assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + Results.push_back(Tmp1); + break; + } + case ISD::BR_CC: { + Tmp1 = Node->getOperand(0); // Chain + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), + Tmp2, Tmp3, Tmp4, dl); + + assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + Results.push_back(Tmp1); + break; + } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + // FIXME: Custom lowering for these operations shouldn't return null! + break; + } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); +} + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; + MVT OVT = Node->getSimpleValueType(0); + if (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::SETCC) { + OVT = Node->getOperand(0).getSimpleValueType(); + } + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3; + switch (Node->getOpcode()) { + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is + // already the correct result. + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + if (Node->getOpcode() == ISD::CTTZ) { + // FIXME: This should set a bit in the zero extended value instead. + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), + ISD::SETEQ); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + } else if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + break; + case ISD::BSWAP: { + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); + Results.push_back(Tmp1); + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT, dl); + Results.push_back(Tmp1); + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::SINT_TO_FP, dl); + Results.push_back(Tmp1); + break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + break; + } + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + unsigned ExtOp, TruncOp; + if (OVT.isVector()) { + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() && "Cannot promote logic operation"); + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } + // Promote each of the values to the new type. + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + // Perform the larger operation, then convert back + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); + break; + } + case ISD::SELECT: { + unsigned ExtOp, TruncOp; + if (Node->getValueType(0).isVector()) { + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; + } else if (Node->getValueType(0).isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + Tmp1 = Node->getOperand(0); + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + // Perform the larger operation, then round down. + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + if (TruncOp != ISD::FP_ROUND) + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); + else + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, + DAG.getIntPtrConstant(0)); + Results.push_back(Tmp1); + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1)); + + // Convert the shuffle mask to the right # elements. + Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(2))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Node->getOperand(2))); + break; + } + case ISD::FDIV: + case ISD::FREM: + case ISD::FPOW: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0))); + break; + } + case ISD::FLOG2: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FEXP: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp2, DAG.getIntPtrConstant(0))); + break; + } + } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); +} + +// SelectionDAG::Legalize - This is the entry point for the file. +// +void SelectionDAG::Legalize() { + /// run - This is the main entry point to this class. + /// + SelectionDAGLegalize(*this).LegalizeDAG(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp new file mode 100644 index 0000000..de217d8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -0,0 +1,1451 @@ +//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float type expansion and softening for LegalizeTypes. +// Softening is the act of turning a computation in an illegal floating point +// type into a computation in an integer type of the same size; also known as +// "soft float". For example, turning f32 arithmetic into operations using i32. +// The resulting integer value is the same as what you would get by performing +// the floating point operation and bitcasting the result to the integer type. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. For example, +// implementing ppcf128 arithmetic in two f64 registers. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// GetFPLibCall - Return the right libcall for the given floating point type. +static RTLIB::Libcall GetFPLibCall(EVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::f128 ? Call_F128 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + +//===----------------------------------------------------------------------===// +// Result Float to Integer Conversion. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften the result of this operator!"); + + case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: + R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); + break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; + case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break; + case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; + case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetSoftenedFloat(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return BitConvertToInteger(Op); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { + // Convert the inputs to integers, and build a new pair out of them. + return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), + BitConvertToInteger(N->getOperand(0)), + BitConvertToInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { + return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + NewOp.getValueType().getVectorElementType(), + NewOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Size = NVT.getSizeInBits(); + + // Mask = ~(1 << (Size-1)) + APInt API = APInt::getAllOnesValue(Size); + API.clearBit(Size-1); + SDValue Mask = DAG.getConstant(API, NVT); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), + DAG.getConstant(RSize - 1, + TLI.getShiftAmountTy(RVT))); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, + TLI.getShiftAmountTy(SignBit.getValueType()))); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, + TLI.getShiftAmountTy(SignBit.getValueType()))); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), + DAG.getConstant(LSize - 1, + TLI.getShiftAmountTy(LVT))); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), + GetSoftenedFloat(N->getOperand(0)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special +// nodes? +SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, + N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::i32 && + "Unsupported power type!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewL; + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + // Do a non-extending load followed by FP_EXTEND. + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, + L->getMemoryVT(), dl, L->getChain(), + L->getBasePtr(), L->getOffset(), L->getPointerInfo(), + L->getMemoryVT(), L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(2)); + SDValue RHS = GetSoftenedFloat(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewVAARG; + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + return NewVAARG; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); + DebugLoc dl = N->getDebugLoc(); + + // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to + // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly + // match. Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, N->getOperand(0)); + return TLI.makeLibCall(DAG, LC, + TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + &Op, 1, false, dl); +} + + +//===----------------------------------------------------------------------===// +// Operand Float to Integer Conversion.. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften this operator's operand!"); + + case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; + case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break; + case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + GetSoftenedFloat(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { + EVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { + EVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { + EVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + DebugLoc dl = N->getDebugLoc(); + + if (ST->isTruncatingStore()) + // Do an FP_ROUND followed by a non-truncating store. + Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), + Val, DAG.getIntPtrConstant(0))); + else + Val = GetSoftenedFloat(Val); + + return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), + ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Float Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand the result of this operator!"); + + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + + case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; + case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; + case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; + case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; + case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(NVT.getSizeInBits() == integerPartWidth && + "Do not know how to expand this float constant!"); + APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[1])), + NVT); + Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[0])), + NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + DebugLoc dl = N->getDebugLoc(); + SDValue Tmp; + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); + // Lo = Hi==fabs(Hi) ? Lo : -Lo; + Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), + DAG.getCondCode(ISD::SETEQ)); +} + +void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COPYSIGN_F32, + RTLIB::COPYSIGN_F64, + RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, + RTLIB::COPYSIGN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + DebugLoc dl = N->getDebugLoc(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + + // Remember the chain. + Chain = Hi.getValue(1); + + // The low part is zero. + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(LD, 1), Chain); +} + +void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + DebugLoc dl = N->getDebugLoc(); + + // First do an SINT_TO_FP, whether the original was signed or unsigned. + // When promoting partial word types to i32 we must honor the signedness, + // though. + if (SrcVT.bitsLE(MVT::i32)) { + // The integer can be represented exactly in an f64. + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i32, Src); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); + Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + } else { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (SrcVT.bitsLE(MVT::i64)) { + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i64, Src); + LC = RTLIB::SINTTOFP_I64_PPCF128; + } else if (SrcVT.bitsLE(MVT::i128)) { + Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src); + LC = RTLIB::SINTTOFP_I128_PPCF128; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); + GetPairElements(Hi, Lo, Hi); + } + + if (isSigned) + return; + + // Unsigned - fix up the SINT_TO_FP value just calculated. + Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SrcVT = Src.getValueType(); + + // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128. + static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; + static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; + static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; + ArrayRef<uint64_t> Parts; + + switch (SrcVT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("Unsupported UINT_TO_FP!"); + case MVT::i32: + Parts = TwoE32; + break; + case MVT::i64: + Parts = TwoE64; + break; + case MVT::i128: + Parts = TwoE128; + break; + } + + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, + DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, + APInt(128, Parts)), + MVT::ppcf128)); + Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, DAG.getCondCode(ISD::SETLT)); + GetPairElements(Lo, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Float Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatOperand - This method is called when the specified operand of the +/// specified node is found to need expansion. At this point, all of the result +/// types of the node are known to be legal, but other operands of the node may +/// need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedFloat(NewLHS, LHSLo, LHSHi); + GetExpandedFloat(NewRHS, RHSLo, RHSHi); + + assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!"); + + // FIXME: This generated code sucks. We want to generate + // FCMPU crN, hi1, hi2 + // BNE crN, L: + // FCMPU crN, lo1, lo2 + // The following can be improved, but not that much. + SDValue Tmp1, Tmp2, Tmp3; + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETOEQ); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, CCCode); + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETUNE); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode); + Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); + NewRHS = SDValue(); // LHS is the result, not a compare. +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(0), Lo, Hi); + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + N->getValueType(0), Hi, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { + EVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, + N->getOperand(0), DAG.getValueType(MVT::f64)); + Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { + EVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; + APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); + SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + DAG.getCondCode(ISD::SETGE)); + } + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, + false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + StoreSDNode *ST = cast<StoreSDNode>(N); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + ST->getValue().getValueType()); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + (void)NVT; + + SDValue Lo, Hi; + GetExpandedOp(ST->getValue(), Lo, Hi); + + return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, + ST->getPointerInfo(), + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp new file mode 100644 index 0000000..d19c13b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -0,0 +1,3042 @@ +//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer type expansion and promotion for LegalizeTypes. +// Promotion is the act of changing a computation in an illegal type into a +// computation in a larger type. For example, implementing i8 arithmetic in an +// i32 register (often needed on powerpc). +// Expansion is the act of changing a computation in an illegal type into a +// computation in two identical registers of a smaller type. For example, +// implementing i64 arithmetic in two i32 registers (often needed on 32-bit +// targets). +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Integer Result Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerResult - This method is called when a result of a node is +/// found to be in need of promotion to a larger type. At this point, the node +/// may also have invalid operands or may have other results that need +/// expansion, we just know that (at least) one result needs promotion. +void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to promote this operator!"); + case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break; + case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; + case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; + case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; + case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; + case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; + case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::EXTRACT_VECTOR_ELT: + Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; + case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SIGN_EXTEND_INREG: + Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; + case ISD::SRA: Res = PromoteIntRes_SRA(N); break; + case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; + case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; + case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + + case ISD::EXTRACT_SUBVECTOR: + Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::VECTOR_SHUFFLE: + Res = PromoteIntRes_VECTOR_SHUFFLE(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; + case ISD::BUILD_VECTOR: + Res = PromoteIntRes_BUILD_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::CONCAT_VECTORS: + Res = PromoteIntRes_CONCAT_VECTORS(N); break; + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + + case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + + case ISD::SDIV: + case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + + case ISD::UDIV: + case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + + case ISD::SADDO: + case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; + case ISD::UADDO: + case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break; + case ISD::SMULO: + case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + + case ISD::ATOMIC_LOAD: + Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_CMP_SWAP: + Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + } + + // If the result is null then the sub-method took care of registering it. + if (Res.getNode()) + SetPromotedInteger(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetPromotedInteger(Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { + // Sign-extend the new bits, and continue the assertion. + SDValue Op = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { + // Zero the new bits, and continue the assertion. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { + EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), ResVT, + N->getChain(), N->getBasePtr(), + N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), + N->getChain(), N->getBasePtr(), + Op2, N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), N->getChain(), N->getBasePtr(), + Op2, Op3, N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + break; + case TargetLowering::TypePromoteInteger: + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector()) + // The input promotes to the same size. Convert the promoted value. + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); + break; + case TargetLowering::TypeSoftenFloat: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + break; + case TargetLowering::TypeScalarizeVector: + // Convert the element to an integer and promote it by hand. + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; + case TargetLowering::TypeSplitVector: { + // For example, i32 = BITCAST v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + EVT::getIntegerVT(*DAG.getContext(), + NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); + } + case TargetLowering::TypeWidenVector: + // The input is widened to the same size. Convert to the widened value. + // Make sure that the outgoing value is not a vector, because this would + // make us bitcast between two vectors which are legalized in different ways. + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + } + + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, TLI.getPointerTy())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { + // The pair element type may be legal, or may not promote to the same type as + // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), JoinIntegers(N->getOperand(0), + N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { + EVT VT = N->getValueType(0); + // FIXME there is no actual debug info here + DebugLoc dl = N->getDebugLoc(); + // Zero extend things like i1, sign extend everything else. It shouldn't + // matter in theory which one we pick, but this tends to give better code? + unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue Result = DAG.getNode(Opc, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), VT), + SDValue(N, 0)); + assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && + "can only promote integers"); + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. + return DAG.getNode(ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + if (N->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.setBit(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), + N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NewOpc = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); + + // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is + // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT + // and SINT conversions are Custom, there is no way to tell which is + // preferable. We choose SINT because that's the right thing on PPC.) + if (N->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + DAG.getValueType(N->getValueType(0).getScalarType())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); + + return DAG.getNode(ISD::AssertZext, dl, + NVT, Res, DAG.getValueType(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(N->getOperand(0).getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); + + // If the result and operand types are the same after promotion, simplify + // to an in-register extension. + if (NVT == Res.getValueType()) { + // The high bits are not guaranteed to be anything. Insert an extend. + if (N->getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(N->getOperand(0).getValueType())); + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getZeroExtendInReg(Res, dl, + N->getOperand(0).getValueType().getScalarType()); + assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); + return Res; + } + } + + // Otherwise, just extend the original operand all the way to the larger type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ISD::LoadExtType ExtType = + ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); + DebugLoc dl = N->getDebugLoc(); + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getPointerInfo(), + N->getMemoryVT(), N->isVolatile(), + N->isNonTemporal(), N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +/// Promote the overflow flag of an overflowing arithmetic node. +SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { + // Simply change the return type of the boolean result. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + EVT ValueVTs[] = { N->getValueType(0), NVT }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + DAG.getVTList(ValueVTs, 2), Ops, 2); + + // Modified the sum result - switch anything that used the old sum to use + // the new one. + ReplaceValueWith(SDValue(N, 0), Res); + + return SDValue(Res.getNode(), 1); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // sign extension of its truncation to the original type. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: sign extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(OVT)); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { + SDValue Mask = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), + LHS.getValueType(), Mask, LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(2)); + SDValue RHS = GetPromotedInteger(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { + EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + DebugLoc dl = N->getDebugLoc(); + assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + "Vector compare must return a vector result!"); + + // Get the SETCC result using the canonical SETCC type. + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); + + assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); + // Convert to the expected type. + return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + // The input value must be properly sign extended. + SDValue Res = SExtPromotedInteger(N->getOperand(0)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + // The input value must be properly zero extended. + SDValue Res = ZExtPromotedInteger(N->getOperand(0)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res; + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InOp.getValueType())) { + default: llvm_unreachable("Unknown type action!"); + case TargetLowering::TypeLegal: + case TargetLowering::TypeExpandInteger: + Res = InOp; + break; + case TargetLowering::TypePromoteInteger: + Res = GetPromotedInteger(InOp); + break; + case TargetLowering::TypeSplitVector: + EVT InVT = InOp.getValueType(); + assert(InVT.isVector() && "Cannot split scalar types"); + unsigned NumElts = InVT.getVectorNumElements(); + assert(NumElts == NVT.getVectorNumElements() && + "Dst and Src must have the same number of elements"); + assert(isPowerOf2_32(NumElts) && + "Promoted vector type must be a power of two"); + + SDValue EOp1, EOp2; + GetSplitVector(InOp, EOp1, EOp2); + + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), + NumElts/2); + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); + } + + // Truncate to NVT instead of VT + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // zero extension of its truncation to the original type. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: zero extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + // Promote the overflow bit trivially. + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT SmallVT = LHS.getValueType(); + + // To determine if the result overflowed in a larger type, we extend the + // input to the larger type, do the multiply (checking if it overflows), + // then also check the high bits of the result to see if overflow happened + // there. + if (N->getOpcode() == ISD::SMULO) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + } else { + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); + } + SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1)); + SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS); + + // Overflow occurred if it occurred in the larger type, or if the high part + // of the result does not zero/sign-extend the low part. Check this second + // possibility first. + SDValue Overflow; + if (N->getOpcode() == ISD::UMULO) { + // Unsigned overflow occurred if the high part is non-zero. + SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, + DAG.getIntPtrConstant(SmallVT.getSizeInBits())); + Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, + DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); + } else { + // Signed overflow occurred if the high part does not sign extend the low. + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), + Mul, DAG.getValueType(SmallVT)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); + } + + // The only other way for overflow to occur is if the multiplication in the + // larger type itself overflowed. + Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow, + SDValue(Mul.getNode(), 1)); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Overflow); + return Mul; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); + // The argument is passed as NumRegs registers of type RegVT. + + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + Chain = Parts[i].getValue(1); + } + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::reverse(Parts.begin(), Parts.end()); + + // Assemble the parts in the promoted type. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); + for (unsigned i = 1; i < NumRegs; ++i) { + SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); + // Shift it to the right position and "or" it in. + Part = DAG.getNode(ISD::SHL, dl, NVT, Part, + DAG.getConstant(i * RegVT.getSizeInBits(), + TLI.getPointerTy())); + Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); + } + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Res; +} + +//===----------------------------------------------------------------------===// +// Integer Operand Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need promotion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::ATOMIC_STORE: + Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N)); + break; + case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break; + case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; + case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; + case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntOp_CONVERT_RNDSAT(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; + case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::VSELECT: + case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; + case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::FP16_TO_FP32: + case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions, but zero extend is cheaper on + // many machines (an AND instead of two shifts), so prefer it. + switch (CCCode) { + default: llvm_unreachable("Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + // ALL of these operations will work if we either sign or zero extend + // the operands (including the unsigned comparisons!). Zero extend is + // usually a simpler/cheaper operation, so prefer it. + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + NewLHS = SExtPromotedInteger(NewLHS); + NewRHS = SExtPromotedInteger(NewRHS); + break; + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), + N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), + N->getOrdering(), N->getSynchScope()); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { + // This should only occur in unusual situations like bitcasting to an + // x86_fp80, so just turn it into a store+load + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get()); + + // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always + // legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + EVT SVT = TLI.getSetCCResultType(MVT::Other); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + + // The chain (Op#0) and basic block destination (Op#2) are always legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { + // Since the result type is legal, the operands must promote to it. + EVT OVT = N->getOperand(0).getValueType(); + SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); + SDValue Hi = GetPromotedInteger(N->getOperand(1)); + assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); + DebugLoc dl = N->getDebugLoc(); + + Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type is not. This implies + // that the vector is a power-of-two in length and that the element + // type does not have a strange size (eg: it is not i1). + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + + // Promote the inserted value. The type does not need to match the + // vector element type. Check that any extra bits introduced will be + // truncated away. + assert(N->getOperand(0).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + + SmallVector<SDValue, 16> NewOps; + for (unsigned i = 0; i < NumElts; ++i) + NewOps.push_back(GetPromotedInteger(N->getOperand(i))); + + return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && + "can only promote integer arguments"); + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + if (OpNo == 1) { + // Promote the inserted value. This is valid because the type does not + // have to match the vector element type. + + // Check that any extra bits introduced will be truncated away. + assert(N->getOperand(1).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + GetPromotedInteger(N->getOperand(1)), + N->getOperand(2)), + 0); + } + + assert(OpNo == 2 && "Different operand and result vector types?"); + + // Promote the index. + SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { + SDValue NewOps[6]; + DebugLoc dl = N->getDebugLoc(); + NewOps[0] = N->getOperand(0); + for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { + SDValue Flag = GetPromotedInteger(N->getOperand(i)); + NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); + } + return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { + // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote + // the operand in place. + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote the condition!"); + SDValue Cond = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ? + OpTy.getScalarType() : OpTy); + Cond = PromoteTargetBoolean(Cond, SVT); + + return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), + N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); + + // The CC (#4) and the possible return values (#2 and #3) have legal types. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); + + // The CC (#2) is always legal. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), + Op, DAG.getValueType(N->getOperand(0).getValueType())); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. + + // Truncate the value and store the result. + return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), + N->getMemoryVT(), + isVolatile, isNonTemporal, Alignment); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Op = GetPromotedInteger(N->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); +} + + +//===----------------------------------------------------------------------===// +// Integer Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand the result of this operator!"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; + case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; + case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; + case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break; + case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break; + case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; + case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; + case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); + SplitInteger(Tmp.first, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Tmp.second); + break; + } + + case ISD::AND: + case ISD::OR: + case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; + + case ISD::ADDC: + case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break; + + case ISD::ADDE: + case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); +} + +/// Lower an atomic node to the appropriate builtin call. +std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + +/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, +/// and the shift amount is a constant 'Amt'. Expand the operation. +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi) { + DebugLoc DL = N->getDebugLoc(); + // Expand the incoming operand to be shifted, so that we have its parts + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + EVT NVT = InL.getValueType(); + unsigned VTBits = N->getValueType(0).getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + EVT ShTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::SHL) { + if (Amt > VTBits) { + Lo = Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getNode(ISD::SHL, DL, + NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy)); + } else if (Amt == NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = InL; + } else if (Amt == 1 && + TLI.isOperationLegalOrCustom(ISD::ADDC, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { + // Emit this X << 1 as X+X. + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); + SDValue LoOps[2] = { InL, InL }; + Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2); + SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; + Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(NVTBits-Amt, ShTy))); + } + return; + } + + if (N->getOpcode() == ISD::SRL) { + if (Amt > VTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRL, DL, + NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); + Hi = DAG.getConstant(0, NVT); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, NVT); + } else { + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); + } + return; + } + + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + if (Amt > VTBits) { + Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(Amt-NVTBits, ShTy)); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); + } +} + +/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify +/// this shift based on knowledge of the high bit of the shift amount. If we +/// can tell this, we know that it is >= 32 or < 32, without knowing the actual +/// shift amount. +bool DAGTypeLegalizer:: +ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned ShBits = ShTy.getScalarType().getSizeInBits(); + unsigned NVTBits = NVT.getScalarType().getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); + + // If we don't know anything about the high bits, exit. + if (((KnownZero|KnownOne) & HighBitMask) == 0) + return false; + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // If we know that any of the high bits of the shift amount are one, then we + // can do this as a couple of simple shifts. + if (KnownOne.intersects(HighBitMask)) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, + DAG.getConstant(~HighBitMask, ShTy)); + + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + Lo = DAG.getConstant(0, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + + // If we know that all of the high bits of the shift amount are zero, then we + // can do this as a couple of simple shifts. + if ((KnownZero & HighBitMask) == HighBitMask) { + // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined + // shift if x is zero. We can use XOR here because x is known to be smaller + // than 32. + SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, + DAG.getConstant(NVTBits-1, ShTy)); + + unsigned Op1, Op2; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; + case ISD::SRL: + case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; + } + + // When shifting right the arithmetic for Lo and Hi is swapped. + if (N->getOpcode() != ISD::SHL) + std::swap(InL, InH); + + // Use a little trick to get the bits that move from Lo to Hi. First + // shift by one bit. + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy)); + // Then compute the remaining shift with amount-1. + SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); + + Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); + + if (N->getOpcode() != ISD::SHL) + std::swap(Hi, Lo); + return true; + } + + return false; +} + +/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift +/// of any size. +bool DAGTypeLegalizer:: +ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); + SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); + SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); + + SDValue LoS, HiS, LoL, HiL; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + // Short: ShAmt < NVTBits + LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + HiS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); + + // Long: ShAmt >= NVTBits + LoL = DAG.getConstant(0, NVT); // Lo part is zero. + HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + return true; + case ISD::SRL: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getConstant(0, NVT); // Hi part is zero. + LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + return true; + case ISD::SRA: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. + DAG.getConstant(NVTBits-1, ShTy)); + LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + return true; + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + EVT NVT = LHSL.getValueType(); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support + // them. TODO: Teach operation legalization how to expand unsupported + // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate + // a carry of type MVT::Glue, but there doesn't seem to be any way to + // generate a value of this type in the expanded code sequence. + bool hasCarry = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::ADDC : ISD::SUBC, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + + if (hasCarry) { + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + return; + } + + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + } else { + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + SDValue Cmp = + DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + if (N->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Res = DisintegrateMERGE_VALUES(N, ResNo); + SplitInteger(Res, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op); + Hi = DAG.getUNDEF(NVT); // The high part is undefined. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part replicates the sign bit of Lo, make it explicit. + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NVTBits-1, TLI.getPointerTy())); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part must be zero, make it explicit. + Hi = DAG.getConstant(0, NVT); + } +} + +void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), + DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, + SDValue &Lo, SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + ISD::LoadExtType ExtType = N->getExtensionType(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + bool isInvariant = N->isInvariant(); + DebugLoc dl = N->getDebugLoc(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + EVT MemVT = N->getMemoryVT(); + + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), + MemVT, isVolatile, isNonTemporal, Alignment); + + // Remember the chain. + Ch = Lo.getValue(1); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, NVT); + } else { + assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); + // The high part is undefined. + Hi = DAG.getUNDEF(NVT); + } + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), + isVolatile, isNonTemporal, isInvariant, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), NEVT, + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + } else { + // Big-endian - high bits are at low addresses. Favor aligned loads at + // the cost of some bit-fiddling. + EVT MemVT = N->getMemoryVT(); + unsigned EBytes = MemVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + + // Load both the high bits and maybe some of the low bits. + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), + EVT::getIntegerVT(*DAG.getContext(), + MemVT.getSizeInBits() - ExcessBits), + isVolatile, isNonTemporal, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Load the rest of the low bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer low bits from the bottom of Hi to the top of Lo. + Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + // Move high bits to the right position in Hi. + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, + NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + } + } + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + +void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH); +} + +void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = NVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + return; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); + return; + } + } + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(NVT, NVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + } + + // If nothing else, we can make a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (VT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (VT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (VT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + +void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // If we can emit an efficient shift operation, do so now. Check to see if + // the RHS is a constant. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi); + + // If we can determine that the high bit of the shift is zero or one, even if + // the low bits are variable, emit this shift in an optimized form. + if (ExpandShiftWithKnownAmountBit(N, Lo, Hi)) + return; + + // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. + unsigned PartsOpc; + if (N->getOpcode() == ISD::SHL) { + PartsOpc = ISD::SHL_PARTS; + } else if (N->getOpcode() == ISD::SRL) { + PartsOpc = ISD::SRL_PARTS; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + PartsOpc = ISD::SRA_PARTS; + } + + // Next check to see if the target supports this SHL_PARTS operation or if it + // will custom expand it. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + // Expand the subcomponents. + SDValue LHSL, LHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + EVT VT = LHSL.getValueType(); + + // If the shift amount operand is coming from a vector legalization it may + // have an illegal type. Fix that first by casting the operand, otherwise + // the new SHL_PARTS operation would need further legalization. + SDValue ShiftOp = N->getOperand(1); + EVT ShiftTy = TLI.getShiftAmountTy(VT); + assert(ShiftTy.getScalarType().getSizeInBits() >= + Log2_32_Ceil(VT.getScalarType().getSizeInBits()) && + "ShiftAmountTy is too small to cover the range of this type!"); + if (ShiftOp.getValueType() != ShiftTy) + ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); + + SDValue Ops[] = { LHSL, LHSH, ShiftOp }; + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Hi = Lo.getValue(1); + return; + } + + // Otherwise, emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + bool isSigned; + if (N->getOpcode() == ISD::SHL) { + isSigned = false; /*sign irrelevant*/ + if (VT == MVT::i16) + LC = RTLIB::SHL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SHL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SHL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SHL_I128; + } else if (N->getOpcode() == ISD::SRL) { + isSigned = false; + if (VT == MVT::i16) + LC = RTLIB::SRL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRL_I128; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + isSigned = true; + if (VT == MVT::i16) + LC = RTLIB::SRA_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRA_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRA_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRA_I128; + } + + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + return; + } + + if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) + llvm_unreachable("Unsupported shift!"); +} + +void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is sign extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); + // The high part is obtained by SRA'ing all but one of the bits of low part. + unsigned LoSize = NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer:: +ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + + if (EVT.bitsLE(Lo.getValueType())) { + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo, + N->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, + DAG.getConstant(Hi.getValueType().getSizeInBits()-1, + TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. Leave the low part alone, + // sext_inreg the high part. + unsigned ExcessBits = + EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::SRL, dl, + N->getOperand(0).getValueType(), N->getOperand(0), + DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, + N->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + +void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // A divide for UMULO should be faster than a function call. + if (N->getOpcode() == ISD::UMULO) { + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + + SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(MUL, Lo, Hi); + + // A divide for UMULO will be faster than a function call. Select to + // make sure we aren't using 0. + SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + RHS, DAG.getConstant(0, VT), ISD::SETEQ); + SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, + DAG.getConstant(1, VT), RHS); + SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); + SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, + ISD::SETNE); + Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + + // Replace this with a libcall that will check overflow. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::MULO_I32; + else if (VT == MVT::i64) + LC = RTLIB::MULO_I64; + else if (VT == MVT::i128) + LC = RTLIB::MULO_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + SDValue Temp = DAG.CreateStackTemporary(PtrVT); + // Temporary for the overflow value, default it to zero. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + EVT ArgVT = N->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = N->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + } + + // Also pass the address of the overflow check. + Entry.Node = Temp; + Entry.Ty = PtrTy->getPointerTo(); + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + TargetLowering:: + CallLoweringInfo CLI(Chain, RetTy, true, false, false, false, + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + SplitInteger(CallInfo.first, Lo, Hi); + SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, + MachinePointerInfo(), false, false, false, 0); + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, + DAG.getConstant(0, PtrVT), + ISD::SETNE); + // Use the overflow from the libcall everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + +void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::UDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::UDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::UDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::UREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::UREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::UREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is zero extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + Hi = DAG.getConstant(0, NVT); // The high part is just a zero. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getZeroExtendInReg(Hi, dl, + EVT::getIntegerVT(*DAG.getContext(), + ExcessBits)); + } +} + +void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDValue Zero = DAG.getConstant(0, VT); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, + N->getOperand(0), + N->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); +} + +//===----------------------------------------------------------------------===// +// Integer Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need expansion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; + case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::RETURNADDR: + case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; + + case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(NewLHS, LHSLo, LHSHi); + GetExpandedInteger(NewRHS, RHSLo, RHSHi); + + if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { + if (RHSLo == RHSHi) { + if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { + if (RHSCST->isAllOnesValue()) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, + LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; + } + } + } + + NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); + NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); + NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + return; + } + + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) + if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + NewLHS = LHSHi; + NewRHS = RHSHi; + return; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + switch (CCCode) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison + // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); + SDValue Tmp1, Tmp2; + Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (!Tmp1.getNode()) + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC); + Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (!Tmp2.getNode()) + Tmp2 = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); + ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode()); + if ((Tmp1C && Tmp1C->isNullValue()) || + (Tmp2C && Tmp2C->isNullValue() && + (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || + (Tmp2C && Tmp2C->getAPIntValue() == 1 && + (CCCode == ISD::SETLT || CCCode == ISD::SETGT || + CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { + // low part is known false, returns high part. + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT, if high part is known true, ignore the low part. + NewLHS = Tmp2; + NewRHS = SDValue(); + return; + } + + NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ, false, + DagCombineInfo, dl); + if (!NewLHS.getNode()) + NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); + NewRHS = SDValue(); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { + // The value being shifted is legal, but the shift amount is too big. + // It follows that either the result of the shift is undefined, or the + // upper half of the shift amount is zero. Just use the lower half. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(1), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { + // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This + // surely makes pretty nice problems on 8/16 bit targets. Just truncate this + // constant to valid type. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(0), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT DstVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this SINT_TO_FP!"); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + + EVT VT = N->getOperand(1).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + GetExpandedInteger(N->getValue(), Lo, Hi); + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + N->getMemoryVT(), isVolatile, isNonTemporal, + Alignment); + } + + if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + GetExpandedInteger(N->getValue(), Lo, Hi); + + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + NEVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + } + + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), + HiVT, isVolatile, isNonTemporal, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + // Just truncate the low part of the source. + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DstVT. Check that the mantissa + // size of DstVT is >= than the number of bits in SrcVT -1. + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && + TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // Do a signed conversion then adjust the result. + SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); + SignedConv = TLI.LowerOperation(SignedConv, DAG); + + // The result of the signed conversion needs adjusting if the 'sign bit' of + // the incoming integer was set. To handle this, we dynamically test to see + // if it is set, and, if so, add a fudge factor. + + const uint64_t F32TwoE32 = 0x4F800000ULL; + const uint64_t F32TwoE64 = 0x5F800000ULL; + const uint64_t F32TwoE128 = 0x7F800000ULL; + + APInt FF(32, 0); + if (SrcVT == MVT::i32) + FF = APInt(32, F32TwoE32); + else if (SrcVT == MVT::i64) + FF = APInt(32, F32TwoE64); + else if (SrcVT == MVT::i128) + FF = APInt(32, F32TwoE128); + else + llvm_unreachable("Unsupported UINT_TO_FP!"); + + // Check whether the sign bit is set. + SDValue Lo, Hi; + GetExpandedInteger(Op, Lo, Hi); + SDValue SignSet = DAG.getSetCC(dl, + TLI.getSetCCResultType(Hi.getValueType()), + Hi, DAG.getConstant(0, Hi.getValueType()), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), + TLI.getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + if (TLI.isBigEndian()) std::swap(Zero, Four); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); + FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + Alignment = std::min(Alignment, 4u); + + // Load the value out, extending it from f32 to the destination float type. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + FudgePtr, + MachinePointerInfo::getConstantPool(), + MVT::f32, + false, false, Alignment); + return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); + } + + // Otherwise, use a libcall. + RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this UINT_TO_FP!"); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), + N->getOperand(0), + N->getOperand(1), N->getOperand(2), + cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + return Swap.getValue(1); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned OutNumElems = OutVT.getVectorNumElements(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + SDValue BaseIdx = N->getOperand(1); + + SmallVector<SDValue, 8> Ops; + Ops.reserve(OutNumElems); + for (unsigned i = 0; i != OutNumElems; ++i) { + + // Extract the element from the original vector. + SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), + BaseIdx, DAG.getIntPtrConstant(i)); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getVectorElementType(), N->getOperand(0), Index); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext); + // Insert the converted element to the new vector. + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { + ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + NewMask.push_back(SV->getMaskElt(i)); + } + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + EVT OutVT = V0.getValueType(); + + return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned NumElems = N->getNumOperands(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + + SmallVector<SDValue, 8> Ops; + Ops.reserve(NumElems); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { + + DebugLoc dl = N->getDebugLoc(); + + assert(!N->getOperand(0).getValueType().isVector() && + "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0)); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT InElemTy = OutVT.getVectorElementType(); + EVT OutElemTy = NOutVT.getVectorElementType(); + + unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); + unsigned NumOutElem = NOutVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + assert(NumElem * NumOperands == NumOutElem && + "Unexpected number of elements"); + + // Take the elements from the first vector. + SmallVector<SDValue, 8> Ops(NumOutElem); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue Op = N->getOperand(i); + for (unsigned j = 0; j < NumElem; ++j) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InElemTy, Op, DAG.getIntPtrConstant(j)); + Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT NOutVTElem = NOutVT.getVectorElementType(); + + DebugLoc dl = N->getDebugLoc(); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + + SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, + NOutVTElem, N->getOperand(1)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT, + V0, ConvElem, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = N->getOperand(1); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + V0->getValueType(0).getScalarType(), V0, V1); + + // EXTRACT_VECTOR_ELT can return types which are wider than the incoming + // element types. If this is the case then we need to expand the outgoing + // value and not truncate it. + return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + unsigned NumElems = N->getNumOperands(); + + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); + + SmallVector<SDValue, 8> NewOps; + NewOps.reserve(NumElems); + + // For each incoming vector + for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) { + SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx)); + EVT SclrTy = Incoming->getValueType(0).getVectorElementType(); + unsigned NumElem = Incoming->getValueType(0).getVectorNumElements(); + + for (unsigned i=0; i<NumElem; ++i) { + // Extract element from incoming vector + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, + Incoming, DAG.getIntPtrConstant(i)); + SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); + NewOps.push_back(Tr); + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), + &NewOps[0], NewOps.size()); + } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp new file mode 100644 index 0000000..b6436bf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -0,0 +1,1145 @@ +//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeTypes method. It transforms +// an arbitrary well-formed SelectionDAG to only consist of legal types. This +// is common code shared among the LegalizeTypes*.cpp files. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> +EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); + +/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +void DAGTypeLegalizer::PerformExpensiveChecks() { + // If a node is not processed, then none of its values should be mapped by any + // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + + // If a node is processed, then each value with an illegal type must be mapped + // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + // Values with a legal type may be mapped by ReplacedValues, but not by any of + // the other maps. + + // Note that these invariants may not hold momentarily when processing a node: + // the node being processed may be put in a map before being marked Processed. + + // Note that it is possible to have nodes marked NewNode in the DAG. This can + // occur in two ways. Firstly, a node may be created during legalization but + // never passed to the legalization core. This is usually due to the implicit + // folding that occurs when using the DAG.getNode operators. Secondly, a new + // node may be passed to the legalization core, but when analyzed may morph + // into a different node, leaving the original node as a NewNode in the DAG. + // A node may morph if one of its operands changes during analysis. Whether + // it actually morphs or not depends on whether, after updating its operands, + // it is equivalent to an existing node: if so, it morphs into that existing + // node (CSE). An operand can change during analysis if the operand is a new + // node that morphs, or it is a processed value that was mapped to some other + // value (as recorded in ReplacedValues) in which case the operand is turned + // into that other value. If a node morphs then the node it morphed into will + // be used instead of it for legalization, however the original node continues + // to live on in the DAG. + // The conclusion is that though there may be nodes marked NewNode in the DAG, + // all uses of such nodes are also marked NewNode: the result is a fungus of + // NewNodes growing on top of the useful nodes, and perhaps using them, but + // not used by them. + + // If a value is mapped by ReplacedValues, then it must have no uses, except + // by nodes marked NewNode (see above). + + // The final node obtained by mapping by ReplacedValues is not marked NewNode. + // Note that ReplacedValues should be applied iteratively. + + // Note that the ReplacedValues map may also map deleted nodes (by iterating + // over the DAG we never dereference deleted nodes). This means that it may + // also map nodes marked NewNode if the deallocated memory was reallocated as + // another node, and that new node was not seen by the LegalizeTypes machinery + // (for example because it was created but not used). In general, we cannot + // distinguish between new nodes and deleted nodes. + SmallVector<SDNode*, 16> NewNodes; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + // Remember nodes marked NewNode - they are subject to extra checking below. + if (I->getNodeId() == NewNode) + NewNodes.push_back(I); + + for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { + SDValue Res(I, i); + bool Failed = false; + + unsigned Mapped = 0; + if (ReplacedValues.find(Res) != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + SDValue NewVal = ReplacedValues[Res]; + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); + while (I != ReplacedValues.end()) { + NewVal = I->second; + I = ReplacedValues.find(NewVal); + } + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); + } + if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + Mapped |= 2; + if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + Mapped |= 4; + if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + Mapped |= 8; + if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + Mapped |= 16; + if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + Mapped |= 32; + if (SplitVectors.find(Res) != SplitVectors.end()) + Mapped |= 64; + if (WidenedVectors.find(Res) != WidenedVectors.end()) + Mapped |= 128; + + if (I->getNodeId() != Processed) { + // Since we allow ReplacedValues to map deleted nodes, it may map nodes + // marked NewNode too, since a deleted node may have been reallocated as + // another node that has not been seen by the LegalizeTypes machinery. + if ((I->getNodeId() == NewNode && Mapped > 1) || + (I->getNodeId() != NewNode && Mapped != 0)) { + dbgs() << "Unprocessed value in a map!"; + Failed = true; + } + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + if (Mapped > 1) { + dbgs() << "Value with legal type was transformed!"; + Failed = true; + } + } else { + if (Mapped == 0) { + dbgs() << "Processed value not in any map!"; + Failed = true; + } else if (Mapped & (Mapped - 1)) { + dbgs() << "Value in multiple maps!"; + Failed = true; + } + } + + if (Failed) { + if (Mapped & 1) + dbgs() << " ReplacedValues"; + if (Mapped & 2) + dbgs() << " PromotedIntegers"; + if (Mapped & 4) + dbgs() << " SoftenedFloats"; + if (Mapped & 8) + dbgs() << " ScalarizedVectors"; + if (Mapped & 16) + dbgs() << " ExpandedIntegers"; + if (Mapped & 32) + dbgs() << " ExpandedFloats"; + if (Mapped & 64) + dbgs() << " SplitVectors"; + if (Mapped & 128) + dbgs() << " WidenedVectors"; + dbgs() << "\n"; + llvm_unreachable(0); + } + } + } + + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } +} + +/// run - This is the main entry point for the type legalizer. This does a +/// top-down traversal of the dag, legalizing types as it goes. Returns "true" +/// if it made any changes. +bool DAGTypeLegalizer::run() { + bool Changed = false; + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + Dummy.setNodeId(Unanalyzed); + + // The root of the dag may dangle to deleted nodes until the type legalizer is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' + // (and remembering them) if they are leaves and assigning 'Unanalyzed' if + // non-leaves. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + if (I->getNumOperands() == 0) { + I->setNodeId(ReadyToProcess); + Worklist.push_back(I); + } else { + I->setNodeId(Unanalyzed); + } + } + + // Now that we have a set of nodes to process, handle them all. + while (!Worklist.empty()) { +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + SDNode *N = Worklist.back(); + Worklist.pop_back(); + assert(N->getNodeId() == ReadyToProcess && + "Node should be ready if on worklist!"); + + if (IgnoreNodeResults(N)) + goto ScanOperands; + + // Scan the values produced by the node, checking to see if any result + // types are illegal. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { + EVT ResultVT = N->getValueType(i); + switch (getTypeAction(ResultVT)) { + case TargetLowering::TypeLegal: + break; + // The following calls must take care of *all* of the node's results, + // not just the illegal result they were passed (this includes results + // with a legal type). Results can be remapped using ReplaceValueWith, + // or their promoted/expanded/etc values registered in PromotedIntegers, + // ExpandedIntegers etc. + case TargetLowering::TypePromoteInteger: + PromoteIntegerResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeExpandInteger: + ExpandIntegerResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeSoftenFloat: + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeExpandFloat: + ExpandFloatResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeScalarizeVector: + ScalarizeVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeSplitVector: + SplitVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeWidenVector: + WidenVectorResult(N, i); + Changed = true; + goto NodeDone; + } + } + +ScanOperands: + // Scan the operand list for the node, handling any nodes with operands that + // are illegal. + { + unsigned NumOperands = N->getNumOperands(); + bool NeedsReanalyzing = false; + unsigned i; + for (i = 0; i != NumOperands; ++i) { + if (IgnoreNodeResults(N->getOperand(i).getNode())) + continue; + + EVT OpVT = N->getOperand(i).getValueType(); + switch (getTypeAction(OpVT)) { + case TargetLowering::TypeLegal: + continue; + // The following calls must either replace all of the node's results + // using ReplaceValueWith, and return "false"; or update the node's + // operands in place, and return "true". + case TargetLowering::TypePromoteInteger: + NeedsReanalyzing = PromoteIntegerOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeExpandInteger: + NeedsReanalyzing = ExpandIntegerOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeSoftenFloat: + NeedsReanalyzing = SoftenFloatOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeExpandFloat: + NeedsReanalyzing = ExpandFloatOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeScalarizeVector: + NeedsReanalyzing = ScalarizeVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeSplitVector: + NeedsReanalyzing = SplitVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeWidenVector: + NeedsReanalyzing = WidenVectorOperand(N, i); + Changed = true; + break; + } + break; + } + + // The sub-method updated N in place. Check to see if any operands are new, + // and if so, mark them. If the node needs revisiting, don't add all users + // to the worklist etc. + if (NeedsReanalyzing) { + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(NewNode); + // Recompute the NodeId and correct processed operands, adding the node to + // the worklist if ready. + SDNode *M = AnalyzeNewNode(N); + if (M == N) + // The node didn't morph - nothing special to do, it will be revisited. + continue; + + // The node morphed - this is equivalent to legalizing by replacing every + // value of N with the corresponding value of M. So do that now. + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + // Replacing the value takes care of remapping the new value. + ReplaceValueWith(SDValue(N, i), SDValue(M, i)); + assert(N->getNodeId() == NewNode && "Unexpected node state!"); + // The node continues to live on as part of the NewNode fungus that + // grows on top of the useful nodes. Nothing more needs to be done + // with it - move on to the next node. + continue; + } + + if (i == NumOperands) { + DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); + } + } +NodeDone: + + // If we reach here, the node was processed, potentially creating new nodes. + // Mark it as processed and add its users to the worklist as appropriate. + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(Processed); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + int NodeId = User->getNodeId(); + + // This node has two options: it can either be a new node or its Node ID + // may be a count of the number of operands it has that are not ready. + if (NodeId > 0) { + User->setNodeId(NodeId-1); + + // If this was the last use it was waiting on, add it to the ready list. + if (NodeId-1 == ReadyToProcess) + Worklist.push_back(User); + continue; + } + + // If this is an unreachable new node, then ignore it. If it ever becomes + // reachable by being used by a newly created node then it will be handled + // by AnalyzeNewNode. + if (NodeId == NewNode) + continue; + + // Otherwise, this node is new: this is the first operand of it that + // became ready. Its new NodeId is the number of operands it has minus 1 + // (as this node is now processed). + assert(NodeId == Unanalyzed && "Unknown node ID!"); + User->setNodeId(User->getNumOperands() - 1); + + // If the node only has a single operand, it is now ready. + if (User->getNumOperands() == 1) + Worklist.push_back(User); + } + } + +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + // If the root changed (e.g. it was a dead load) update the root. + DAG.setRoot(Dummy.getValue()); + + // Remove dead nodes. This is important to do for cleanliness but also before + // the checking loop below. Implicit folding by the DAG.getNode operators and + // node morphing can cause unreachable nodes to be around with their flags set + // to new. + DAG.RemoveDeadNodes(); + + // In a debug build, scan all the nodes to make sure we found them all. This + // ensures that there are no cycles and that everything got processed. +#ifndef NDEBUG + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + bool Failed = false; + + // Check that all result types are legal. + if (!IgnoreNodeResults(I)) + for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(I->getValueType(i))) { + dbgs() << "Result type " << i << " illegal!\n"; + Failed = true; + } + + // Check that all operand types are legal. + for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(I->getOperand(i).getNode()) && + !isTypeLegal(I->getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal!\n"; + Failed = true; + } + + if (I->getNodeId() != Processed) { + if (I->getNodeId() == NewNode) + dbgs() << "New node not analyzed?\n"; + else if (I->getNodeId() == Unanalyzed) + dbgs() << "Unanalyzed node not noticed?\n"; + else if (I->getNodeId() > 0) + dbgs() << "Operand not processed?\n"; + else if (I->getNodeId() == ReadyToProcess) + dbgs() << "Not added to worklist?\n"; + Failed = true; + } + + if (Failed) { + I->dump(&DAG); dbgs() << "\n"; + llvm_unreachable(0); + } + } +#endif + + return Changed; +} + +/// AnalyzeNewNode - The specified node is the root of a subtree of potentially +/// new nodes. Correct any processed operands (this may change the node) and +/// calculate the NodeId. If the node itself changes to a processed node, it +/// is not remapped - the caller needs to take care of this. +/// Returns the potentially changed node. +SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { + // If this was an existing node that is already done, we're done. + if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) + return N; + + // Remove any stale map entries. + ExpungeNode(N); + + // Okay, we know that this node is new. Recursively walk all of its operands + // to see if they are new also. The depth of this walk is bounded by the size + // of the new tree that was constructed (usually 2-3 nodes), so we don't worry + // about revisiting of nodes. + // + // As we walk the operands, keep track of the number of nodes that are + // processed. If non-zero, this will become the new nodeid of this node. + // Operands may morph when they are analyzed. If so, the node will be + // updated after all operands have been analyzed. Since this is rare, + // the code tries to minimize overhead in the non-morphing case. + + SmallVector<SDValue, 8> NewOps; + unsigned NumProcessed = 0; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue OrigOp = N->getOperand(i); + SDValue Op = OrigOp; + + AnalyzeNewValue(Op); // Op may morph. + + if (Op.getNode()->getNodeId() == Processed) + ++NumProcessed; + + if (!NewOps.empty()) { + // Some previous operand changed. Add this one to the list. + NewOps.push_back(Op); + } else if (Op != OrigOp) { + // This is the first operand to change - add all operands so far. + NewOps.append(N->op_begin(), N->op_begin() + i); + NewOps.push_back(Op); + } + } + + // Some operands changed - update the node. + if (!NewOps.empty()) { + SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); + if (M != N) { + // The node morphed into a different node. Normally for this to happen + // the original node would have to be marked NewNode. However this can + // in theory momentarily not be the case while ReplaceValueWith is doing + // its stuff. Mark the original node NewNode to help sanity checking. + N->setNodeId(NewNode); + if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) + // It morphed into a previously analyzed node - nothing more to do. + return M; + + // It morphed into a different new node. Do the equivalent of passing + // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need + // to remap the operands, since they are the same as the operands we + // remapped above. + N = M; + ExpungeNode(N); + } + } + + // Calculate the NodeId. + N->setNodeId(N->getNumOperands() - NumProcessed); + if (N->getNodeId() == ReadyToProcess) + Worklist.push_back(N); + + return N; +} + +/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// If the node changes to a processed node, then remap it. +void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { + Val.setNode(AnalyzeNewNode(Val.getNode())); + if (Val.getNode()->getNodeId() == Processed) + // We were passed a processed node, or it morphed into one - remap it. + RemapValue(Val); +} + +/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// This can occur when a node is deleted then reallocated as a new node - +/// the mapping in ReplacedValues applies to the deleted node, not the new +/// one. +/// The only map that can have a deleted node as a source is ReplacedValues. +/// Other maps can have deleted nodes as targets, but since their looked-up +/// values are always immediately remapped using RemapValue, resulting in a +/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue +/// always performs correct mappings. In order to keep the mapping correct, +/// ExpungeNode should be called on any new nodes *before* adding them as +/// either source or target to ReplacedValues (which typically means calling +/// Expunge when a new node is first seen, since it may no longer be marked +/// NewNode by the time it is added to ReplacedValues). +void DAGTypeLegalizer::ExpungeNode(SDNode *N) { + if (N->getNodeId() != NewNode) + return; + + // If N is not remapped by ReplacedValues then there is nothing to do. + unsigned i, e; + for (i = 0, e = N->getNumValues(); i != e; ++i) + if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) + break; + + if (i == e) + return; + + // Remove N from all maps - this is expensive but rare. + + for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), + E = PromotedIntegers.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), + E = SoftenedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), + E = ScalarizedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), + E = WidenedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), + E = ReplacedValues.end(); I != E; ++I) + RemapValue(I->second); + + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + ReplacedValues.erase(SDValue(N, i)); +} + +/// RemapValue - If the specified value was already legalized to another value, +/// replace it by that value. +void DAGTypeLegalizer::RemapValue(SDValue &N) { + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); + if (I != ReplacedValues.end()) { + // Use path compression to speed up future lookups if values get multiply + // replaced with other values. + RemapValue(I->second); + N = I->second; + assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!"); + } +} + +namespace { + /// NodeUpdateListener - This class is a DAGUpdateListener that listens for + /// updates to nodes and recomputes their ready state. + class NodeUpdateListener : public SelectionDAG::DAGUpdateListener { + DAGTypeLegalizer &DTL; + SmallSetVector<SDNode*, 16> &NodesToAnalyze; + public: + explicit NodeUpdateListener(DAGTypeLegalizer &dtl, + SmallSetVector<SDNode*, 16> &nta) + : SelectionDAG::DAGUpdateListener(dtl.getDAG()), + DTL(dtl), NodesToAnalyze(nta) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + // It is possible, though rare, for the deleted node N to occur as a + // target in a map, so note the replacement N -> E in ReplacedValues. + assert(E && "Node not replaced?"); + DTL.NoteDeletion(N, E); + + // In theory the deleted node could also have been scheduled for analysis. + // So remove it from the set of nodes which will be analyzed. + NodesToAnalyze.remove(N); + + // In general nothing needs to be done for E, since it didn't change but + // only gained new uses. However N -> E was just added to ReplacedValues, + // and the result of a ReplacedValues mapping is not allowed to be marked + // NewNode. So if E is marked NewNode, then it needs to be analyzed. + if (E->getNodeId() == DAGTypeLegalizer::NewNode) + NodesToAnalyze.insert(E); + } + + virtual void NodeUpdated(SDNode *N) { + // Node updates can mean pretty much anything. It is possible that an + // operand was set to something already processed (f.e.) in which case + // this node could become ready. Recompute its flags. + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + N->setNodeId(DAGTypeLegalizer::NewNode); + NodesToAnalyze.insert(N); + } + }; +} + + +/// ReplaceValueWith - The specified value was legalized to the specified other +/// value. Update the DAG and NodeIds replacing any uses of From to use To +/// instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { + assert(From.getNode() != To.getNode() && "Potential legalization loop!"); + + // If expansion produced new nodes, make sure they are properly marked. + ExpungeNode(From.getNode()); + AnalyzeNewValue(To); // Expunges To. + + // Anything that used the old node should now use the new one. Note that this + // can potentially cause recursive merging. + SmallSetVector<SDNode*, 16> NodesToAnalyze; + NodeUpdateListener NUL(*this, NodesToAnalyze); + do { + DAG.ReplaceAllUsesOfValueWith(From, To); + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; + + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); + // OldVal may be a target of the ReplacedValues map which was marked + // NewNode to force reanalysis because it was updated. Ensure that + // anything that ReplacedValues mapped to OldVal will now be mapped + // all the way to NewVal. + ReplacedValues[OldVal] = NewVal; + } + // The original node continues to exist in the DAG, marked NewNode. + } + } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); +} + +void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted integer"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedIntegers[Op]; + assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); +} + +void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for softened float"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = SoftenedFloats[Op]; + assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); +} + +void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + // Note that in some cases vector operation operands may be greater than + // the vector element type. For example BUILD_VECTOR of type <1 x i1> with + // a constant i8 operand. + assert(Result.getValueType().getSizeInBits() >= + Op.getValueType().getVectorElementType().getSizeInBits() && + "Invalid type for scalarized vector"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = ScalarizedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); +} + +void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded integer"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); +} + +void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded float"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); +} + +void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't split"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType().getVectorElementType() == + Op.getValueType().getVectorElementType() && + 2*Lo.getValueType().getVectorNumElements() == + Op.getValueType().getVectorNumElements() && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for split vector"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + assert(Entry.first.getNode() == 0 && "Node already split"); + Entry.first = Lo; + Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); +} + +void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for widened vector"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = WidenedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node already widened!"); + OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); +} + + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +/// BitConvertToInteger - Convert to an integer of the same size. +SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { + unsigned BitWidth = Op.getValueType().getSizeInBits(); + return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), + EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); +} + +/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the +/// same size. +SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { + assert(Op.getValueType().isVector() && "Only applies to vectors!"); + unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); + EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); + unsigned NumElts = Op.getValueType().getVectorNumElements(); + return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), + EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); +} + +SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, + EVT DestVT) { + DebugLoc dl = Op.getDebugLoc(); + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, + MachinePointerInfo(), false, false, 0); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); +} + +/// CustomLowerNode - Replace the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +/// The last parameter is FALSE if we are dealing with a node with legal +/// result types and illegal operand. The second parameter denotes the type of +/// illegal OperandNo in that case. +/// The last parameter being TRUE means we are dealing with a +/// node with illegal result types. The second parameter denotes the type of +/// illegal ResNo in that case. +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + if (LegalizeResult) + TLI.ReplaceNodeResults(N, Results, DAG); + else + TLI.LowerOperationWrapper(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom lower it after all. + return false; + + // Make everything that once used N's values now use those in Results instead. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + ReplaceValueWith(SDValue(N, i), Results[i]); + // Propagate node ordering + DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); + } + return true; +} + + +/// CustomWidenLowerNode - Widen the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + TLI.ReplaceNodeResults(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom widen lower its result after all. + return false; + + // Update the widening map. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + SetWidenedVector(SDValue(N, i), Results[i]); + return true; +} + +SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + if (i != ResNo) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + return SDValue(N->getOperand(ResNo)); +} + +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split into two not necessarily identical pieces. +void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { + // Currently all types are split in half. + if (!InVT.isVector()) { + LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + } else { + unsigned NumElements = InVT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElements/2); + } +} + +/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and +/// high parts of the given value. +void DAGTypeLegalizer::GetPairElements(SDValue Pair, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Pair.getDebugLoc(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(1)); +} + +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, + SDValue Index) { + DebugLoc dl = Index.getDebugLoc(); + // Make sure the index type is big enough to compute in. + if (Index.getValueType().bitsGT(TLI.getPointerTy())) + Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); + else + Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EltSize, Index.getValueType())); + return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); +} + +/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { + // Arbitrarily use dlHi for result DebugLoc + DebugLoc dlHi = Hi.getDebugLoc(); + DebugLoc dlLo = Lo.getDebugLoc(); + EVT LVT = Lo.getValueType(); + EVT HVT = Hi.getValueType(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + LVT.getSizeInBits() + HVT.getSizeInBits()); + + Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); + Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, + DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); +} + +/// LibCallify - Convert the node into a libcall with the same prototype. +SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, + bool isSigned) { + unsigned NumOps = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + if (NumOps == 0) { + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); + } else if (NumOps == 1) { + SDValue Op = N->getOperand(0); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); + } else if (NumOps == 2) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); + } + SmallVector<SDValue, 8> Ops(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + Ops[i] = N->getOperand(i); + + return TLI.makeLibCall(DAG, LC, N->getValueType(0), + &Ops[0], NumOps, isSigned, dl); +} + +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo; +} + +/// PromoteTargetBoolean - Promote the given target boolean to a target boolean +/// of the given type. A target boolean is an integer value, not necessarily of +/// type i1, the bits of which conform to getBooleanContents. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { + DebugLoc dl = Bool.getDebugLoc(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); + return DAG.getNode(ExtendCode, dl, VT, Bool); +} + +/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT +/// bits in Hi. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Op.getDebugLoc(); + assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == + Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +/// SplitInteger - Return the lower and upper halves of Op's bits in a value +/// type half the size of Op's. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + SDValue &Lo, SDValue &Hi) { + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), + Op.getValueType().getSizeInBits()/2); + SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Entry Point +//===----------------------------------------------------------------------===// + +/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that +/// only uses types natively supported by the target. Returns "true" if it made +/// any changes. +/// +/// Note that this is an involved process that may invalidate pointers into +/// the graph. +bool SelectionDAG::LegalizeTypes() { + return DAGTypeLegalizer(*this).run(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h new file mode 100644 index 0000000..54ea926 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -0,0 +1,750 @@ +//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DAGTypeLegalizer class. This is a private interface +// shared between the code that implements the SelectionDAG::LegalizeTypes +// method. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAG_LEGALIZETYPES_H +#define SELECTIONDAG_LEGALIZETYPES_H + +#define DEBUG_TYPE "legalize-types" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks +/// on it until only value types the target machine can handle are left. This +/// involves promoting small sizes to large sizes or splitting up large values +/// into small values. +/// +class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { + const TargetLowering &TLI; + SelectionDAG &DAG; +public: + // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information + // about the state of the node. The enum has all the values. + enum NodeIdFlags { + /// ReadyToProcess - All operands have been processed, so this node is ready + /// to be handled. + ReadyToProcess = 0, + + /// NewNode - This is a new node, not before seen, that was created in the + /// process of legalizing some other node. + NewNode = -1, + + /// Unanalyzed - This node's ID needs to be set to the number of its + /// unprocessed operands. + Unanalyzed = -2, + + /// Processed - This is a node that has already been processed. + Processed = -3 + + // 1+ - This is a node which has this many unprocessed operands. + }; +private: + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// simple value type, where the two bits correspond to the LegalizeAction + /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// getTypeAction - Return how we should legalize values of this type. + TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT); + } + + /// isTypeLegal - Return true if this type is legal on this target. + bool isTypeLegal(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; + } + + /// IgnoreNodeResults - Pretend all of this node's results are legal. + bool IgnoreNodeResults(SDNode *N) const { + return N->getOpcode() == ISD::TargetConstant; + } + + /// PromotedIntegers - For integer nodes that are below legal width, this map + /// indicates what promoted value to use. + SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; + + /// ExpandedIntegers - For integer nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; + + /// SoftenedFloats - For floating point nodes converted to integers of + /// the same size, this map indicates the converted value to use. + SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; + + /// ExpandedFloats - For float nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; + + /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the + /// scalar value of type 'ty' to use. + SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; + + /// SplitVectors - For nodes that need to be split this map indicates + /// which operands are the expanded version of the input. + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; + + /// WidenedVectors - For vector nodes that need to be widened, indicates + /// the widened value to use. + SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; + + /// ReplacedValues - For values that have been replaced with another, + /// indicates the replacement value to use. + SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; + + /// Worklist - This defines a worklist of nodes to process. In order to be + /// pushed onto this worklist, all operands of a node must have already been + /// processed. + SmallVector<SDNode*, 128> Worklist; + +public: + explicit DAGTypeLegalizer(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); + } + + /// run - This is the main entry point for the type legalizer. This does a + /// top-down traversal of the dag, legalizing types as it goes. Returns + /// "true" if it made any changes. + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { + ExpungeNode(Old); + ExpungeNode(New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + } + + SelectionDAG &getDAG() const { return DAG; } + +private: + SDNode *AnalyzeNewNode(SDNode *N); + void AnalyzeNewValue(SDValue &Val); + void ExpungeNode(SDNode *N); + void PerformExpensiveChecks(); + void RemapValue(SDValue &N); + + // Common routines. + SDValue BitConvertToInteger(SDValue Op); + SDValue BitConvertVectorToIntegerVector(SDValue Op); + SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); + bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); + bool CustomWidenLowerNode(SDNode *N, EVT VT); + + /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES + /// node with the corresponding input operand, except for the result 'ResNo', + /// for which the corresponding input operand is returned. + SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); + + SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); + SDValue JoinIntegers(SDValue Lo, SDValue Hi); + SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); + + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + + SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); + void ReplaceValueWith(SDValue From, SDValue To); + void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Integer Promotion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetPromotedInteger - Given a processed operand Op which was promoted to a + /// larger integer type, this returns the promoted value. The low bits of the + /// promoted value corresponding to the original type are exactly equal to Op. + /// The extra bits contain rubbish, so the promoted value may need to be zero- + /// or sign-extended from the original type before it is usable (the helpers + /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). + /// For example, if Op is an i16 and was promoted to an i32, then this method + /// returns an i32, the lower 16 bits of which coincide with Op, and the upper + /// 16 bits of which contain rubbish. + SDValue GetPromotedInteger(SDValue Op) { + SDValue &PromotedOp = PromotedIntegers[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedInteger(SDValue Op, SDValue Result); + + /// SExtPromotedInteger - Get a promoted operand and sign extend it to the + /// final size. + SDValue SExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + } + + /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the + /// final size. + SDValue ZExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); + } + + // Integer Result Promotion. + void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_AssertSext(SDNode *N); + SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); + SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); + SDValue PromoteIntRes_BITCAST(SDNode *N); + SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); + SDValue PromoteIntRes_Constant(SDNode *N); + SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntRes_CTLZ(SDNode *N); + SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N); + SDValue PromoteIntRes_INT_EXTEND(SDNode *N); + SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SDIV(SDNode *N); + SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_VSELECT(SDNode *N); + SDValue PromoteIntRes_SELECT_CC(SDNode *N); + SDValue PromoteIntRes_SETCC(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_TRUNCATE(SDNode *N); + SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_UDIV(SDNode *N); + SDValue PromoteIntRes_UNDEF(SDNode *N); + SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + + // Integer Operand Promotion. + bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N); + SDValue PromoteIntOp_BITCAST(SDNode *N); + SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); + SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); + SDValue PromoteIntOp_MEMBARRIER(SDNode *N); + SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); + SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_TRUNCATE(SDNode *N); + SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + + void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); + + //===--------------------------------------------------------------------===// + // Integer Expansion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedInteger - Given a processed operand Op which was expanded into + /// two integers of half the size, this returns the two halves. The low bits + /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// For example, if Op is an i64 which was expanded into two i32's, then this + /// method returns the two i32's, with Lo being equal to the lower 32 bits of + /// Op, and Hi being equal to the upper 32 bits. + void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi); + + // Integer Result Expansion. + void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Integer Operand Expansion. + bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandIntOp_BITCAST(SDNode *N); + SDValue ExpandIntOp_BR_CC(SDNode *N); + SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); + SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue ExpandIntOp_SELECT_CC(SDNode *N); + SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ExpandIntOp_TRUNCATE(SDNode *N); + SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_RETURNADDR(SDNode *N); + SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); + + void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float to Integer Conversion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op. + SDValue GetSoftenedFloat(SDValue Op) { + SDValue &SoftenedOp = SoftenedFloats[Op]; + RemapValue(SoftenedOp); + assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); + return SoftenedOp; + } + void SetSoftenedFloat(SDValue Op, SDValue Result); + + // Result Float to Integer Conversion. + void SoftenFloatResult(SDNode *N, unsigned OpNo); + SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCEIL(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FEXP(SDNode *N); + SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FFLOOR(SDNode *N); + SDValue SoftenFloatRes_FLOG(SDNode *N); + SDValue SoftenFloatRes_FLOG2(SDNode *N); + SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); + SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); + SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N); + SDValue SoftenFloatRes_FP_ROUND(SDNode *N); + SDValue SoftenFloatRes_FPOW(SDNode *N); + SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSQRT(SDNode *N); + SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTRUNC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_UNDEF(SDNode *N); + SDValue SoftenFloatRes_VAARG(SDNode *N); + SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + + // Operand Float to Integer Conversion. + bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_BITCAST(SDNode *N); + SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_ROUND(SDNode *N); + SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N); + SDValue SoftenFloatOp_SELECT_CC(SDNode *N); + SDValue SoftenFloatOp_SETCC(SDNode *N); + SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Float Expansion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedFloat - Given a processed operand Op which was expanded into + /// two floating point values of half the size, this returns the two halves. + /// The low bits of Op are exactly equal to the bits of Lo; the high bits + /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded + /// into two f64's, then this method returns the two f64's, with Lo being + /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits. + void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi); + + // Float Result Expansion. + void ExpandFloatResult(SDNode *N, unsigned ResNo); + void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Float Operand Expansion. + bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FP_ROUND(SDNode *N); + SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_SELECT_CC(SDNode *N); + SDValue ExpandFloatOp_SETCC(SDNode *N); + SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); + + void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Scalarization Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetScalarizedVector - Given a processed one-element vector Op which was + /// scalarized to its element type, this returns the element. For example, + /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + SDValue GetScalarizedVector(SDValue Op) { + SDValue &ScalarizedOp = ScalarizedVectors[Op]; + RemapValue(ScalarizedOp); + assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); + return ScalarizedOp; + } + void SetScalarizedVector(SDValue Op, SDValue Result); + + // Vector Result Scalarization: <1 x ty> -> ty. + void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_TernaryOp(SDNode *N); + SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + SDValue ScalarizeVecRes_InregOp(SDNode *N); + + SDValue ScalarizeVecRes_BITCAST(SDNode *N); + SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); + SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); + SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_SETCC(SDNode *N); + SDValue ScalarizeVecRes_UNDEF(SDNode *N); + SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_VSETCC(SDNode *N); + + // Vector Operand Scalarization: <1 x ty> -> ty. + bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_BITCAST(SDNode *N); + SDValue ScalarizeVecOp_EXTEND(SDNode *N); + SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Splitting Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSplitVector - Given a processed vector Op which was split into vectors + /// of half the size, this method returns the halves. The first elements of + /// Op coincide with the elements of Lo; the remaining elements of Op coincide + /// with the elements of Hi: Op is what you would get by concatenating Lo and + /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then + /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// elements of Op, and Hi to the last 4 elements. + void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. + void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + SDValue &Hi); + + // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. + bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_UnaryOp(SDNode *N); + + SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_VSETCC(SDNode *N); + SDValue SplitVecOp_FP_ROUND(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetWidenedVector - Given a processed vector Op which was widened into a + /// larger vector, this method returns the larger vector. The elements of + /// the returned vector consist of the elements of Op followed by elements + /// containing rubbish. For example, if Op is a v2i32 that was widened to a + /// v4i32, then this method returns a v4i32 for which the first two elements + /// are the same as those of Op, while the last two elements contain rubbish. + SDValue GetWidenedVector(SDValue Op) { + SDValue &WidenedOp = WidenedVectors[Op]; + RemapValue(WidenedOp); + assert(WidenedOp.getNode() && "Operand wasn't widened?"); + return WidenedOp; + } + void SetWidenedVector(SDValue Op, SDValue Result); + + // Widen Vector Result Promotion. + void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo); + SDValue WidenVecRes_BITCAST(SDNode* N); + SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); + SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); + SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); + SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_SETCC(SDNode* N); + SDValue WidenVecRes_UNDEF(SDNode *N); + SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VSETCC(SDNode* N); + + SDValue WidenVecRes_Ternary(SDNode *N); + SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); + SDValue WidenVecRes_Shift(SDNode *N); + SDValue WidenVecRes_Unary(SDNode *N); + SDValue WidenVecRes_InregOp(SDNode *N); + + // Widen Vector Operand. + bool WidenVectorOperand(SDNode *N, unsigned OpNo); + SDValue WidenVecOp_BITCAST(SDNode *N); + SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_SETCC(SDNode* N); + + SDValue WidenVecOp_Convert(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Utilities Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Helper GenWidenVectorLoads - Helper function to generate a set of + /// loads to load a vector with a resulting wider type. It takes + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode *LD); + + /// GenWidenVectorExtLoads - Helper function to generate a set of extension + /// loads to load a ector with a resulting wider type. It takes + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + /// ExtType: extension element type + SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType); + + /// Helper genWidenVectorStores - Helper function to generate a set of + /// stores to store a widen vector into non widen memory + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST); + + /// Helper genWidenVectorTruncStores - Helper function to generate a set of + /// stores to store a truncate widen vector into non widen memory + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST); + + /// Modifies a vector input (widen or narrows) to a vector of NVT. The + /// input vector must have the same element type as NVT. + SDValue ModifyToType(SDValue InOp, EVT WidenVT); + + + //===--------------------------------------------------------------------===// + // Generic Splitting: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // not necessarily identical types. As such they can be used for splitting + // vectors and expanding integers and floats. + + void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isVector()) + GetSplitVector(Op, Lo, Hi); + else if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type + /// which is split (or expanded) into two not necessarily identical pieces. + void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); + + /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and + /// high parts of the given value. + void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); + + // Generic Result Splitting. + void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Generic Expansion: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // identical types of half the size, and that the Lo/Hi part is stored first + // in memory on little/big-endian machines, followed by the Hi/Lo part. As + // such they can be used for expanding integers and floats. + + void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + // Generic Result Expansion. + void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); + + // Generic Operand Expansion. + SDValue ExpandOp_BITCAST (SDNode *N); + SDValue ExpandOp_BUILD_VECTOR (SDNode *N); + SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); + SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); + SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); +}; + +} // end namespace llvm. + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp new file mode 100644 index 0000000..222d1c0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -0,0 +1,525 @@ +//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements generic type expansion and splitting for LegalizeTypes. +// The routines here perform legalization when the details of the type (such as +// whether it is an integer or a float) do not matter. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. The Lo/Hi part +// is required to be stored first in memory on little/big-endian machines. +// Splitting is the act of changing a computation in an illegal type to be a +// computation in two not necessarily identical registers of a smaller type. +// There are no requirements on how the type is represented in memory. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DataLayout.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Generic Result Expansion. +//===----------------------------------------------------------------------===// + +// These routines assume that the Lo/Hi part is stored first in memory on +// little/big-endian machines, followed by the Hi/Lo part. This means that +// they cannot be used as is on vectors, for which Lo is always stored first. +void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetExpandedOp(Op, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + break; + case TargetLowering::TypeSoftenFloat: + // Convert the integer operand instead. + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + // Convert the expanded pieces of the input. + GetExpandedOp(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeSplitVector: + GetSplitVector(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeScalarizeVector: + // Convert the element instead. + SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeWidenVector: { + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); + InOp = GetWidenedVector(InOp); + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + InVT.getVectorNumElements()/2); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + } + } + + if (InVT.isVector() && OutVT.isInteger()) { + // Handle cases like i64 = BITCAST v1i64 on x86, where the operand + // is legal but the result is not. + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>. + while (!isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } + + if (isTypeLegal(NVT)) { + SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); + + SmallVector<SDValue, 8> Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, + CastInOp, DAG.getIntPtrConstant(i))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + + if (TLI.isBigEndian()) + std::swap(LHS, RHS); + + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + EVT::getIntegerVT( + *DAG.getContext(), + LHS.getValueType().getSizeInBits() << 1), + LHS, RHS)); + } + Lo = Vals[Slot++]; + Hi = Vals[Slot++]; + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return; + } + } + + // Lower the bit-convert to a store/load from the stack. + assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); + + // Create the stack frame object. Make sure it is aligned for both + // the source and expanded destination types. + unsigned Alignment = + TLI.getDataLayout()->getPrefTypeAlignment(NOutVT. + getTypeForEVT(*DAG.getContext())); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, + false, false, 0); + + // Load the first half from the stack slot. + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + false, false, false, 0); + + // Increment the pointer to the other half. + unsigned IncrementSize = NOutVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the second half from the stack slot. + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), false, + false, false, MinAlign(Alignment, IncrementSize)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Return the operands. + Lo = N->getOperand(0); + Hi = N->getOperand(1); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + GetExpandedOp(N->getOperand(0), Lo, Hi); + SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? + Hi : Lo; + + assert(Part.getValueType() == N->getValueType(0) && + "Type twice as big as expanded type not itself expanded!"); + + GetPairElements(Part, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue OldVec = N->getOperand(0); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + EVT OldEltVT = OldVec.getValueType().getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + + // Convert to a vector of the expanded element type, for example + // <3 x i64> -> <6 x i32>. + EVT OldVT = N->getValueType(0); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + + if (OldVT != OldEltVT) { + // The result of EXTRACT_VECTOR_ELT may be larger than the element type of + // the input vector. If so, extend the elements of the input vector to the + // same bitwidth as the result before expanding. + assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); + } + + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, + EVT::getVectorVT(*DAG.getContext(), + NewVT, 2*OldElts), + OldVec); + + // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. + SDValue Idx = N->getOperand(1); + + // Make sure the type of Idx is big enough to hold the new values. + if (Idx.getValueType().bitsLT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); + DebugLoc dl = N->getDebugLoc(); + + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + isVolatile, isNonTemporal, isInvariant, Alignment); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits() / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + const unsigned Align = N->getConstantOperandVal(3); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + + +//===--------------------------------------------------------------------===// +// Generic Operand Expansion. +//===--------------------------------------------------------------------===// + +SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0).isVector()) { + // An illegal expanding type is being converted to a legal vector type. + // Make a two element vector out of the expanded parts and convert that + // instead, but only if the new vector type is legal (otherwise there + // is no point, and it might create expansion loops). For example, on + // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), + TLI.getTypeToTransformTo(*DAG.getContext(), OVT), + 2); + + if (isTypeLegal(NVT)) { + SDValue Parts[2]; + GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); + } + } + + // Otherwise, store to a temporary and load out again as the new type. + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + EVT OldVT = N->getOperand(0).getValueType(); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + DebugLoc dl = N->getDebugLoc(); + + assert(OldVT == VecVT.getVectorElementType() && + "BUILD_VECTOR operand type doesn't match vector element type!"); + + // Build a vector of twice the length out of the expanded elements. + // For example <3 x i64> -> <6 x i32>. + std::vector<SDValue> NewElts; + NewElts.reserve(NumElts*2); + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(i), Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + NewElts.push_back(Lo); + NewElts.push_back(Hi); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + EVT::getVectorVT(*DAG.getContext(), + NewVT, NewElts.size()), + &NewElts[0], NewElts.size()); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(0), Lo, Hi); + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; +} + +SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = N->getOperand(1); + EVT OldEVT = Val.getValueType(); + EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT); + + assert(OldEVT == VecVT.getVectorElementType() && + "Inserted element type doesn't match vector element type!"); + + // Bitconvert to a vector of twice the length with elements of the expanded + // type, insert the expanded vector elements, and then convert back. + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, + NewVecVT, N->getOperand(0)); + + SDValue Lo, Hi; + GetExpandedOp(Val, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + SDValue Idx = N->getOperand(2); + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); + Idx = DAG.getNode(ISD::ADD, dl, + Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && + "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + Ops[0] = N->getOperand(0); + SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); + for (unsigned i = 1; i < NumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { + assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + DebugLoc dl = N->getDebugLoc(); + + StoreSDNode *St = cast<StoreSDNode>(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + St->getValue().getValueType()); + SDValue Chain = St->getChain(); + SDValue Ptr = St->getBasePtr(); + unsigned Alignment = St->getAlignment(); + bool isVolatile = St->isVolatile(); + bool isNonTemporal = St->isNonTemporal(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + unsigned IncrementSize = NVT.getSizeInBits() / 8; + + SDValue Lo, Hi; + GetExpandedOp(St->getValue(), Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + St->getPointerInfo().getWithOffset(IncrementSize), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===--------------------------------------------------------------------===// +// Generic Result Splitting. +//===--------------------------------------------------------------------===// + +// Be careful to make no assumptions about which of Lo/Hi is stored first in +// memory (for vectors it is always Lo first followed by Hi in the following +// bytes; for integers and floats it is Lo first if and only if the machine is +// little-endian). + +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetSplitOp(Op, Lo, Hi); +} + +void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH, CL, CH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + CL = CH = Cond; + if (Cond.getValueType().isVector()) { + assert(Cond.getValueType().getVectorElementType() == MVT::i1 && + "Condition legalized before result?"); + unsigned NumElements = Cond.getValueType().getVectorNumElements(); + EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); + CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, + DAG.getIntPtrConstant(0)); + CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, + DAG.getIntPtrConstant(NumElements / 2)); + } + + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH); +} + +void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(2), LL, LH); + GetSplitOp(N->getOperand(3), RL, RH); + + Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0), + N->getOperand(1), LL, RL, N->getOperand(4)); + Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0), + N->getOperand(1), LH, RH, N->getOperand(4)); +} + +void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getUNDEF(LoVT); + Hi = DAG.getUNDEF(HiVT); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp new file mode 100644 index 0000000..c6e066e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -0,0 +1,770 @@ +//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeVectors method. +// +// The vector legalizer looks for vector operations which might need to be +// scalarized and legalizes them. This is a separate step from Legalize because +// scalarizing can introduce illegal types. For example, suppose we have an +// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition +// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the +// operation, which introduces nodes with the illegal type i64 which must be +// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; +// the operation must be unrolled, which introduces nodes with the illegal +// type i8 which must be promoted. +// +// This does not legalize vector manipulations like ISD::BUILD_VECTOR, +// or operations that happen to take a vector which are custom-lowered; +// the legalization for such operations never produces nodes +// with illegal types, so it's okay to put off legalizing them until +// SelectionDAG::Legalize runs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { +class VectorLegalizer { + SelectionDAG& DAG; + const TargetLowering &TLI; + bool Changed; // Keep track of whether anything changed + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; + + // Adds a node to the translation cache + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + + // Legalizes the given node + SDValue LegalizeOp(SDValue Op); + // Assuming the node is legal, "legalize" the results + SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + // Implements unrolling a VSETCC. + SDValue UnrollVSETCC(SDValue Op); + // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB + // isn't legal. + // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + // SINT_TO_FLOAT and SHR on vectors isn't legal. + SDValue ExpandUINT_TO_FLOAT(SDValue Op); + // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + SDValue ExpandSEXTINREG(SDValue Op); + // Implement vselect in terms of XOR, AND, OR when blend is not supported + // by the target. + SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); + SDValue ExpandLoad(SDValue Op); + SDValue ExpandStore(SDValue Op); + SDValue ExpandFNEG(SDValue Op); + // Implements vector promotion; this is essentially just bitcasting the + // operands to a different type and bitcasting the result back to the + // original type. + SDValue PromoteVectorOp(SDValue Op); + // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input + // operand to the next size up. + SDValue PromoteVectorOpINT_TO_FP(SDValue Op); + + public: + bool Run(); + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} +}; + +bool VectorLegalizer::Run() { + // Before we start legalizing vector nodes, check if there are any vectors. + bool HasVectors = false; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + // Check if the values of the nodes contain vectors. We don't need to check + // the operands because we are going to check their values at some point. + for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); + J != E; ++J) + HasVectors |= J->isVector(); + + // If we found a vector node we can start the legalization. + if (HasVectors) + break; + } + + // If this basic block has no vectors then no need to legalize vectors. + if (!HasVectors) + return false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); + + return Changed; +} + +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { + // Generic legalization: just pass the operand through. + for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.getResNo()); +} + +SDValue VectorLegalizer::LegalizeOp(SDValue Op) { + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDNode* Node = Op.getNode(); + + // Legalize the operands + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + + SDValue Result = + SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) + return TranslateLegalizeResults(Op, Result); + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } + } else if (Op.getOpcode() == ISD::STORE) { + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + EVT StVT = ST->getMemoryVT(); + MVT ValVT = ST->getValue().getSimpleValueType(); + if (StVT.isVector() && ST->isTruncatingStore()) + switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: + Changed = true; + return LegalizeOp(TLI.LowerOperation(Result, DAG)); + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandStore(Op)); + } + } + + bool HasVectorValue = false; + for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); + J != E; + ++J) + HasVectorValue |= J->isVector(); + if (!HasVectorValue) + return TranslateLegalizeResults(Op, Result); + + EVT QueryType; + switch (Op.getOpcode()) { + default: + return TranslateLegalizeResults(Op, Result); + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::SELECT: + case ISD::VSELECT: + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + case ISD::FMA: + case ISD::SIGN_EXTEND_INREG: + QueryType = Node->getValueType(0); + break; + case ISD::FP_ROUND_INREG: + QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + QueryType = Node->getOperand(0).getValueType(); + break; + } + + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + case TargetLowering::Promote: + switch (Op.getOpcode()) { + default: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + Result = PromoteVectorOpINT_TO_FP(Op); + Changed = true; + break; + } + break; + case TargetLowering::Legal: break; + case TargetLowering::Custom: { + SDValue Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.getNode()) { + Result = Tmp1; + break; + } + // FALL THROUGH + } + case TargetLowering::Expand: + if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) + Result = ExpandSEXTINREG(Op); + else if (Node->getOpcode() == ISD::VSELECT) + Result = ExpandVSELECT(Op); + else if (Node->getOpcode() == ISD::SELECT) + Result = ExpandSELECT(Op); + else if (Node->getOpcode() == ISD::UINT_TO_FP) + Result = ExpandUINT_TO_FLOAT(Op); + else if (Node->getOpcode() == ISD::FNEG) + Result = ExpandFNEG(Op); + else if (Node->getOpcode() == ISD::SETCC) + Result = UnrollVSETCC(Op); + else + Result = DAG.UnrollVectorOp(Op.getNode()); + break; + } + + // Make sure that the generated code is itself legal. + if (Result != Op) { + Result = LegalizeOp(Result); + Changed = true; + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { + // Vector "promotion" is basically just bitcasting and doing the operation + // in a different type. For example, x86 promotes ISD::AND on v2i32 to + // v1i64. + MVT VT = Op.getSimpleValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + + return DAG.getNode(ISD::BITCAST, dl, VT, Op); +} + +SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { + // INT_TO_FP operations may require the input operand be promoted even + // when the type is otherwise legal. + EVT VT = Op.getOperand(0).getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + + // Normal getTypeToPromoteTo() doesn't work here, as that will promote + // by widening the vector w/ the same element width and twice the number + // of elements. We want the other way around, the same number of elements, + // each twice the width. + // + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); + assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); + + // Build a new vector type and check if it is legal. + MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : + ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], + Operands.size()); +} + + +SDValue VectorLegalizer::ExpandLoad(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + + if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + // When elements in a vector is not byte-addressable, we cannot directly + // load each element by advancing pointer, which could only address bytes. + // Instead, we load all significant words, mask bits off, and concatenate + // them to form each element. Finally, they are extended to destination + // scalar type to build the destination vector. + EVT WideVT = TLI.getPointerTy(); + + assert(WideVT.isRound() && + "Could not handle the sophisticated case when the widest integer is" + " not power of 2."); + assert(WideVT.bitsGE(SrcEltVT) && + "Type is not legalized?"); + + unsigned WideBytes = WideVT.getStoreSize(); + unsigned Offset = 0; + unsigned RemainingBytes = SrcVT.getStoreSize(); + SmallVector<SDValue, 8> LoadVals; + + while (RemainingBytes > 0) { + SDValue ScalarLoad; + unsigned LoadBytes = WideBytes; + + if (RemainingBytes >= LoadBytes) { + ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + } else { + EVT LoadVT = WideVT; + while (RemainingBytes < LoadBytes) { + LoadBytes >>= 1; // Reduce the load size by half. + LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); + } + ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LoadVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } + + RemainingBytes -= LoadBytes; + Offset += LoadBytes; + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(LoadBytes)); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + // Extract bits, pack and extend/trunc them into destination type. + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); + + unsigned BitOffset = 0; + unsigned WideIdx = 0; + unsigned WideBits = WideVT.getSizeInBits(); + + for (unsigned Idx = 0; Idx != NumElem; ++Idx) { + SDValue Lo, Hi, ShAmt; + + if (BitOffset < WideBits) { + ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); + } + + BitOffset += SrcEltBits; + if (BitOffset >= WideBits) { + WideIdx++; + Offset -= WideBits; + if (Offset > 0) { + ShAmt = DAG.getConstant(SrcEltBits - Offset, + TLI.getShiftAmountTy(WideVT)); + Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); + } + } + + if (Hi.getNode()) + Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); + + switch (ExtType) { + default: llvm_unreachable("Unknown extended-load op!"); + case ISD::EXTLOAD: + Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::ZEXTLOAD: + Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::SEXTLOAD: + ShAmt = DAG.getConstant(WideBits - SrcEltBits, + TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); + Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); + Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); + break; + } + Vals.push_back(Lo); + } + } else { + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, + Op.getNode()->getValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), &Vals[0], Vals.size()); + + AddLegalizedOperand(Op.getValue(0), Value); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return (Op.getResNo() ? NewChain : Value); +} + +SDValue VectorLegalizer::ExpandStore(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + SDValue Chain = ST->getChain(); + SDValue BasePTR = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + unsigned NumElem = StVT.getVectorNumElements(); + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + // Cast floats into integers + unsigned ScalarSize = MemSclVT.getSizeInBits(); + + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + + // Store Stride in bytes + unsigned Stride = ScalarSize/8; + // Extract each of the elements from the original vector + // and save them into memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, + ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, + isVolatile, isNonTemporal, Alignment); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Stores.push_back(Store); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + AddLegalizedOperand(Op, TF); + return TF; +} + +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + unsigned NumElem = VT.getVectorNumElements(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = TLI.getSetCCResultType(VT); + assert(MaskTy.isVector() && "Invalid CC type"); + assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() + && "Invalid mask size"); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), + DAG.getConstant(0, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + SmallVector<SDValue, 8> Ops(NumElem, Mask); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + +SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { + EVT VT = Op.getValueType(); + + // Make sure that the SRA and SHL instructions are available. + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + DebugLoc DL = Op.getDebugLoc(); + EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); + + unsigned BW = VT.getScalarType().getSizeInBits(); + unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); + + Op = Op.getOperand(0); + Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); + return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); +} + +SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { + // Implement VSELECT in terms of XOR, AND, OR + // on platforms which do not support blend natively. + EVT VT = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(true) != + TargetLowering::ZeroOrNegativeOneBooleanContent) + return DAG.UnrollVectorOp(Op.getNode()); + + assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() + && "Invalid mask size"); + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { + EVT VT = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + // Make sure that the SINT_TO_FP and SRL instructions are available. + if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + EVT SVT = VT.getScalarType(); + assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + + unsigned BW = SVT.getSizeInBits(); + SDValue HalfWord = DAG.getConstant(BW/2, VT); + + // Constants to clear the upper part of the word. + // Notice that we can also use SHL+SHR, but using a constant is slightly + // faster on x86. + uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + SDValue HalfWordMask = DAG.getConstant(HWMask, VT); + + // Two to the power of half-word-size. + SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); + + // Clear upper part of LO, lower HI + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + + // Convert hi and lo to floats + // Convert the hi part back to the upper values + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + + // Add the two halves + return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); +} + + +SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { + SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Zero, Op.getOperand(0)); + } + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 8> Ops(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getIntPtrConstant(i)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + LHSElem, RHSElem, CC); + Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); +} + +} + +bool SelectionDAG::LegalizeVectors() { + return VectorLegalizer(*this).Run(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp new file mode 100644 index 0000000..5ec8535 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -0,0 +1,2795 @@ +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in two vectors of half the size. For example, implementing +// <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to scalarize the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; + case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; + case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; + case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::ANY_EXTEND: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + R = ScalarizeVecRes_UnaryOp(N); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + R = ScalarizeVecRes_BinOp(N); + break; + case ISD::FMA: + R = ScalarizeVecRes_TernaryOp(N); + break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + Op0.getValueType(), Op0, Op1, Op2); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetScalarizedVector(Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + NewVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + // The BUILD_VECTOR operands may be of wider element types and + // we may need to truncate them back to the requested return type. + if (EltVT.isInteger()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + Op0, DAG.getValueType(NewVT), + DAG.getValueType(Op0.getValueType()), + N->getOperand(3), + N->getOperand(4), + cast<CvtRndSatSDNode>(N)->getCvtCode()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + NewVT, Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad(ISD::UNINDEXED, + N->getExtensionType(), + N->getValueType(0).getVectorElementType(), + N->getDebugLoc(), + N->getChain(), N->getBasePtr(), + DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->isInvariant(), N->getOriginalAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT, + LHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond, DAG.getConstant(1, CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + + if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N); + + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + DebugLoc DL = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + EVT NVT = N->getValueType(0).getVectorElementType(); + DebugLoc DL = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + return DAG.getNode(ExtendCode, DL, NVT, Res); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to scalarize this operator's operand!"); + case ISD::BITCAST: + Res = ScalarizeVecOp_BITCAST(N); + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + Res = ScalarizeVecOp_EXTEND(N); + break; + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + N->getValueType(0), Elt); +} + +/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs +/// to be scalarized, it must be <1 x ty>. Extend the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SmallVector<SDValue, 1> Ops(1); + Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + N->getValueType(0).getScalarType(), Elt); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], 1); +} + +/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - +/// use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector<SDValue, 8> Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], Ops.size()); +} + +/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to +/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the +/// index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), + Res); + return Res; +} + +/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be +/// scalarized, it must be <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + DebugLoc dl = N->getDebugLoc(); + + if (N->isTruncatingStore()) + return DAG.getTruncStore(N->getChain(), dl, + GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->getAlignment()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorResult - This method is called when the specified result of the +/// specified node is found to need vector splitting. At this point, the node +/// may also have invalid operands or may have other results that need +/// legalization, we just know that (at least) one result needs vector +/// splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Split node result: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::VSELECT: + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::LOAD: + SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); + break; + case ISD::SETCC: + SplitVecRes_SETCC(N, Lo, Hi); + break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); + break; + + case ISD::ANY_EXTEND: + case ISD::CONVERT_RNDSAT: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + SplitVecRes_UnaryOp(N, Lo, Hi); + break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: + SplitVecRes_BinOp(N, Lo, Hi); + break; + case ISD::FMA: + SplitVecRes_TernaryOp(N, Lo, Hi); + break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op0Lo, Op0Hi; + GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); + SDValue Op1Lo, Op1Hi; + GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); + SDValue Op2Lo, Op2Hi; + GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), + Op0Lo, Op1Lo, Op2Lo); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), + Op0Hi, Op1Hi, Op2Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + DebugLoc dl = N->getDebugLoc(); + + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeWidenVector: + break; + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + break; + case TargetLowering::TypeSplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + if (TLI.isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + EVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements())); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + DebugLoc dl = N->getDebugLoc(); + + EVT LoVT, HiVT; + GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + DAG.getValueType(LoVT)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + DAG.getValueType(HiVT)); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getIntPtrConstant(IdxVal - LoNumElts)); + return; + } + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = + TLI.getDataLayout()->getPrefTypeAlignment(VecType); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + EVT LoVT, HiVT; + DebugLoc dl = LD->getDebugLoc(); + GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + EVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getOriginalAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + + EVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, + isInvariant, Alignment); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + + EVT LoVT, HiVT; + DebugLoc DL = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + EVT InVT = N->getOperand(0).getValueType(); + SDValue LL, LH, RL, RH; + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(0)); + RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + EVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. + EVT InVT = N->getOperand(0).getValueType(); + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + GetSplitVector(N->getOperand(0), Lo, Hi); + } else { + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + } + + if (N->getOpcode() == ISD::FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); + SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + } +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + EVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector<int, 16> Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + EVT EltVT = NewVT.getVectorElementType(); + SmallVector<SDValue, 16> SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getIntPtrConstant(Idx))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + } + + Ops.clear(); + } +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorOperand - This method is called when the specified operand of the +/// specified node is found to need vector splitting. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Split node operand: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; + case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + case ISD::VSELECT: + Res = SplitVecOp_VSELECT(N, OpNo); + break; + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FTRUNC: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = SplitVecOp_UnaryOp(N); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { + // The only possibility for an illegal operand is the mask, since result type + // legalization would have handled this node already otherwise. + assert(OpNo == 0 && "Illegal operand must be mask"); + + SDValue Mask = N->getOperand(0); + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + DebugLoc DL = N->getDebugLoc(); + EVT MaskVT = Mask.getValueType(); + assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Lo and Hi have differing types");; + + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); + assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); + + LLVMContext &Ctx = *DAG.getContext(); + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue LoElts = DAG.getIntPtrConstant(LoNumElts); + EVT Src0VT = Src0.getValueType(); + EVT Src0EltTy = Src0VT.getVectorElementType(); + EVT MaskEltTy = MaskVT.getVectorElementType(); + + EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); + EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); + EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); + EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); + + SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); + SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); + + SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); + SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); + + SDValue LoMask = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); + SDValue HiMask = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + + SDValue LoSelect = + DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); + SDValue HiSelect = + DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { + // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. + EVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec.getValueType(); + + if (isa<ConstantSDNode>(Idx)) { + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())), 0); + } + + // Store the vector to the stack. + EVT EltVT = VecVT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Load back the required element. + StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo(), EltVT, false, false, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + DebugLoc DL = N->getDebugLoc(); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + bool isVol = N->isVolatile(); + bool isNT = N->isNonTemporal(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + EVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), + LoMemVT, isVol, isNT, Alignment); + else + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), + isVol, isNT, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVol, isNT, Alignment); + else + Hi = DAG.getStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + isVol, isNT, Alignment); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { + DebugLoc DL = N->getDebugLoc(); + + // The input operands all must have the same type, and we know the result the + // result type is valid. Convert this to a buildvector which extracts all the + // input elements. + // TODO: If the input elements are power-two vectors, we could convert this to + // a new CONCAT_VECTORS node with elements that are half-wide. + SmallVector<SDValue, 32> Elts; + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + SDValue Op = N->getOperand(op); + for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); + i != e; ++i) { + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, + Op, DAG.getIntPtrConstant(i))); + + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), + &Elts[0], Elts.size()); +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + // The result has a legal vector type, but the input needs splitting. + SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; + DebugLoc DL = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo0, Hi0); + GetSplitVector(N->getOperand(1), Lo1, Hi1); + unsigned PartElements = Lo0.getValueType().getVectorNumElements(); + EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); + EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); + + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); + return PromoteTargetBoolean(Con, N->getValueType(0)); +} + + +SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc DL = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +} + + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Widen node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + + // See if the target wants to custom widen this node. + if (CustomWidenLowerNode(N, N->getValueType(ResNo))) + return; + + SDValue Res = SDValue(); + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen the result of this operator!"); + + case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); + break; + case ISD::ADD: + case ISD::AND: + case ISD::BSWAP: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::UDIV: + case ISD::UREM: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + Res = WidenVecRes_Shift(N); + break; + + case ISD::ANY_EXTEND: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + Res = WidenVecRes_Convert(N); + break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + Res = WidenVecRes_Unary(N); + break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + unsigned Opcode = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + // Operation doesn't trap so just widen as normal. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest legal vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); + } + CurNumElts = 0; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeLegal(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + DebugLoc DL = N->getDebugLoc(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) { + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InOp); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + } + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, + &Ops[0], NumConcat); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVec); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); + } + + if (InVTNumElts % WidenNumElts == 0) { + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, + InOp, DAG.getIntPtrConstant(0)); + // Extract the input and convert the shorten input vector. + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVal); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + if (N->getNumOperands() == 1) + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); + else + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + EVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), + ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), + cast<VTSDNode>(N->getOperand(1))->getVT() + .getVectorElementType(), + WidenVT.getVectorNumElements()); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + WidenVT, WidenLHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { + SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); + return GetWidenedVector(WidenVec); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + break; + case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeSplitVector: + break; + case TargetLowering::TypeWidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + EVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + EVT InEltVT = InVT.getVectorElementType(); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); + } + + if (TLI.isTypeLegal(NewInVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector<SDValue, 16> Ops(NewNumElts); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i < NewNumElts; ++i) + Ops[i] = UndefVal; + + SDValue NewVec; + if (InVT.isVector()) + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NewInVT, &Ops[0], NewNumElts); + else + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + NewInVT, &Ops[0], NewNumElts); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); + } + } + + return CreateStackStoreLoad(InOp, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + // Build a vector with undefined for the new nodes. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); + NewOps.reserve(WidenNumElts); + for (unsigned i = NumElts; i < WidenNumElts; ++i) + NewOps.push_back(DAG.getUNDEF(EltVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + EVT InVT = N->getOperand(0).getValueType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumInElts = InVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector<SDValue, 16> Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + break; + + if (i == NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector<int, 16> MaskOps(WidenNumElts, -1); + for (unsigned i = 0; i < NumInElts; ++i) { + MaskOps[i] = i; + MaskOps[i + NumInElts] = i + WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); + } + } + } + + // Fall back to use extracts and build vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue InOp = N->getOperand(0); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + SDValue DTyOp = DAG.getValueType(WidenVT); + SDValue STyOp = DAG.getValueType(InWidenVT); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getIntPtrConstant(0)); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + DTyOp = DAG.getValueType(EltVT); + STyOp = DAG.getValueType(InEltVT); + + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + + EVT InVT = InOp.getValueType(); + + // Check if we can just return the input vector after widening. + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(IdxVal+i)); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector<SDValue, 16> LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other, + &LdChain[0], LdChain.size()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + WidenVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + EVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + EVT CondEltVT = CondVT.getVectorElementType(); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), + CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) + Cond1 = GetWidenedVector(Cond1); + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT, + InOp1, InOp2, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector<int, 16> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operands must be vectors"); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + EVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non vector type"); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), WidenNumElts); + InOp1 = GetWidenedVector(InOp1); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + (void)WidenInVT; + return DAG.getNode(ISD::SETCC, N->getDebugLoc(), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen this operator's operand!"); + + case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = WidenVecOp_Convert(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal, it is unlikely + // that we can fix the input to a legal type so unroll the convert + // into some scalar code and create a nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + SmallVector<SDValue, 16> Ops(NumElts); + for (unsigned i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + EVT InWidenVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { + unsigned NewNumElts = InWidenSize / Size; + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getIntPtrConstant(0)); + } + } + + return CreateStackStoreLoad(InOp, VT); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + // If the input vector is not legal, it is likely that we will not find a + // legal vector of the same size. Replace the concatenate vector with a + // nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + + EVT InVT = N->getOperand(0).getValueType(); + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + unsigned NumOperands = N->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value but we want only to store the original + // vector type. + StoreSDNode *ST = cast<StoreSDNode>(N); + + SmallVector<SDValue, 16> StChain; + if (ST->isTruncatingStore()) + GenWidenVectorTruncStores(StChain, ST); + else + GenWidenVectorStores(StChain, ST); + + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(), + MVT::Other,&StChain[0],StChain.size()); +} + +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + + return PromoteTargetBoolean(CC, N->getValueType(0)); +} + + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + +// Utility function to find the type to chop up a widen vector for load/store +// TLI: Target lowering used to determine legal types. +// Width: Width left need to load/store. +// WidenVT: The widen vector type to load to/store from +// Align: If 0, don't allow use of a wider type +// WidenEx: If Align is not 0, the amount additional we can load/store from. + +static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { + EVT WidenEltVT = WidenVT.getVectorElementType(); + unsigned WidenWidth = WidenVT.getSizeInBits(); + unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); + unsigned AlignInBits = Align*8; + + // If we have one element to load/store, return it. + EVT RetVT = WidenEltVT; + if (Width == WidenEltWidth) + return RetVT; + + // See if there is larger legal integer than the element type to load/store + unsigned VT; + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + RetVT = MemVT; + break; + } + } + + // See if there is a larger vector type to load/store that has the same vector + // element type and is evenly divisible with the WidenVT. + for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { + EVT MemVT = (MVT::SimpleValueType) VT; + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + return MemVT; + } + } + + return RetVT; +} + +// Builds a vector type from scalar loads +// VecTy: Resulting Vector type +// LDOps: Load operators to build a vector type +// [Start,End) the list of loads to use. +static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, + SmallVector<SDValue, 16>& LdOps, + unsigned Start, unsigned End) { + DebugLoc dl = LdOps[Start].getDebugLoc(); + EVT LdTy = LdOps[Start].getValueType(); + unsigned Width = VecTy.getSizeInBits(); + unsigned NumElts = Width / LdTy.getSizeInBits(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); + + unsigned Idx = 1; + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); + + for (unsigned i = Start + 1; i != End; ++i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + NumElts = Width / NewLdTy.getSizeInBits(); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); + VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); + // Readjust position and vector position based on new load type + Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); + LdTy = NewLdTy; + } + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getIntPtrConstant(Idx++)); + } + return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, + LoadSDNode *LD) { + // The strategy assumes that we can efficiently load powers of two widths. + // The routines chops the vector into the largest vector loads with the same + // element type or scalar loads and then recombines it to the widen vector + // type. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + unsigned WidenWidth = WidenVT.getSizeInBits(); + EVT LdVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + + int LdWidth = LdVT.getSizeInBits(); + int WidthDiff = WidenWidth - LdWidth; // Difference + unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads + + // Find the vector type that can load from. + EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + int NewVTWidth = NewVT.getSizeInBits(); + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), + isVolatile, isNonTemporal, isInvariant, Align); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction + if (LdWidth <= NewVTWidth) { + if (!NewVT.isVector()) { + unsigned NumElts = WidenWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + if (NewVT == WidenVT) + return LdOp; + + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector<SDValue, 16> ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], + NumConcat); + } + + // Load vector by using multiple loads from largest vector to scalar + SmallVector<SDValue, 16> LdOps; + LdOps.push_back(LdOp); + + LdWidth -= NewVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + SDValue L; + if (LdWidth < NewVTWidth) { + // Our current type we are using is too large, find a better size + NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVTWidth = NewVT.getSizeInBits(); + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector<SDValue, 16> Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), + &Loads[0], Loads.size()); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + + + LdWidth -= NewVTWidth; + } + + // Build the vector from the loads operations + unsigned End = LdOps.size(); + if (!LdOps[0].getValueType().isVector()) + // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to loads are power of 2 and the scalars load + // can be combined to make a power of 2 vector. + SmallVector<SDValue, 16> ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First combine the scalar loads to a vector + if (!LdTy.isVector()) { + for (--i; i >= 0; --i) { + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; + } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + &ConcatOps[Idx], End - Idx); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } + + if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + &ConcatOps[Idx], End - Idx); + + // We need to fill the rest with undefs to build the vector + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector<SDValue, 16> WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + { + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); +} + +SDValue +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode * LD, + ISD::LoadExtType ExtType) { + // For extension loads, it may not be more efficient to chop up the vector + // and then extended it. Instead, we unroll the load and build a new vector. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, + LD->getPointerInfo(), + LdEltVT, isVolatile, isNonTemporal, Align); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, + LD->getPointerInfo().getWithOffset(Offset), LdEltVT, + isVolatile, isNonTemporal, Align); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); +} + + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST) { + // The strategy assumes that we can efficiently store powers of two widths. + // The routines chops the vector into the largest vector stores with the same + // element type or scalar stores. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = ST->getDebugLoc(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + EVT ValVT = ValOp.getValueType(); + unsigned ValWidth = ValVT.getSizeInBits(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned ValEltWidth = ValEltVT.getSizeInBits(); + assert(StVT.getVectorElementType() == ValEltVT); + + int Idx = 0; // current index to store + unsigned Offset = 0; // offset from base to store + while (StWidth != 0) { + // Find the largest vector type we can store with + EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); + unsigned NewVTWidth = NewVT.getSizeInBits(); + unsigned Increment = NewVTWidth / 8; + if (NewVT.isVector()) { + unsigned NumVTElts = NewVT.getVectorNumElements(); + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getIntPtrConstant(Idx)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset))); + StWidth -= NewVTWidth; + Offset += Increment; + Idx += NumVTElts; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + } while (StWidth != 0 && StWidth >= NewVTWidth); + } else { + // Cast the vector to the scalar type we can store + unsigned NumElts = ValWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); + // Readjust index position based on new vector type + Idx = Idx * ValEltWidth / NewVTWidth; + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getIntPtrConstant(Idx++)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset))); + StWidth -= NewVTWidth; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + } while (StWidth != 0 && StWidth >= NewVTWidth); + // Restore index back to be relative to the original widen element type + Idx = Idx * NewVTWidth / ValEltWidth; + } + } +} + +void +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST) { + // For extension loads, it may not be more efficient to truncate the vector + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = ST->getDebugLoc(); + + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); + + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo(), StEltVT, + isVolatile, isNonTemporal, Align)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, + ST->getPointerInfo().getWithOffset(Offset), + StEltVT, isVolatile, isNonTemporal, + MinAlign(Align, Offset))); + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + EVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + DebugLoc dl = InOp.getDebugLoc(); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getIntPtrConstant(0)); + + // Fall back to extract and build. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(Idx)); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp new file mode 100644 index 0000000..473e138 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -0,0 +1,655 @@ +//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that prioritizes instructions using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// The scheduler is basically a top-down adaptable list scheduler with DFA +// resource tracking added to the cost function. +// DFA is queried as a state machine to model "packets/bundles" during +// schedule. Currently packets/bundles are discarded at the end of +// scheduling, affecting only order of instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable use of DFA during scheduling")); + +static cl::opt<signed> RegPressureThreshold( + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); + + +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : + Picker(this), + InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) +{ + TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); + TLI = &IS->getTargetLowering(); + + const TargetMachine &tm = (*IS->MF).getTarget(); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + // This hard requirement could be relaxed, but for now + // do not let it procede. + assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); + + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; +} + +unsigned +ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *PredSU = I->getSUnit(); + const SDNode *ScegN = PredSU->getNode(); + + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: NumberDeps++; break; + case ISD::CopyToReg: break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + MVT VT = ScegN->getSimpleValueType(i); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, + unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *SuccSU = I->getSUnit(); + const SDNode *ScegN = SuccSU->getNode(); + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: break; + case ISD::CopyToReg: NumberDeps++; break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +static unsigned numberCtrlDepsInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +static unsigned numberCtrlPredInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +/// +/// Initialize nodes. +/// +void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + initNumRegDefsLeft(SU); + SU->NodeQueueId = 0; + } +} + +/// This heuristic is used if DFA scheduling is not desired +/// for some VLIW platform. +bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +void ResourcePriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + Queue.push_back(SU); +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getNode()) + return false; + + // If this is a compound instruction, + // it is likely to be a call. Do not delay it. + if (SU->getNode()->getGluedNode()) + return true; + + // First see if the pipeline could receive this instruction + // in the current cycle. + if (SU->getNode()->isMachineOpcode()) + switch (SU->getNode()->getMachineOpcode()) { + default: + if (!ResourcesModel->canReserveResources(&TII->get( + SU->getNode()->getMachineOpcode()))) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + + // Now see if there are no other dependencies + // to instructions alredy in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignor order deps. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + + return true; +} + +/// Keep track of available resources. +void ResourcePriorityQueue::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { + ResourcesModel->clearResources(); + Packet.clear(); + } + + if (SU->getNode() && SU->getNode()->isMachineOpcode()) { + switch (SU->getNode()->getMachineOpcode()) { + default: + ResourcesModel->reserveResources(&TII->get( + SU->getNode()->getMachineOpcode())); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + Packet.push_back(SU); + } + // Forcefully end packet for PseudoOps. + else { + ResourcesModel->clearResources(); + Packet.clear(); + } + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + } +} + +signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + // Gen estimate. + for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { + MVT VT = SU->getNode()->getSimpleValueType(i); + if (TLI->isTypeLegal(VT) + && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance += numberRCValSuccInSU(SU, RCId); + } + // Kill estimate. + for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { + const SDValue &Op = SU->getNode()->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (isa<ConstantSDNode>(Op.getNode())) + continue; + + if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance -= numberRCValPredInSU(SU, RCId); + } + return RegBalance; +} + +/// Estimates change in reg pressure from this SU. +/// It is achieved by trivial tracking of defined +/// and used vregs in dependent instructions. +/// The RawPressure flag makes this function to ignore +/// existing reg file sizes, and report raw def/use +/// balance. +signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + if (RawPressure) { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + else { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + if ((RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) > 0) && + (RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + + return RegBalance; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 100; +static const unsigned PriorityThree = 50; +static const unsigned PriorityFour = 15; +static const unsigned PriorityFive = 5; +static const unsigned ScaleOne = 20; +static const unsigned ScaleTwo = 10; +static const unsigned ScaleThree = 5; +static const unsigned FactorOne = 2; + +/// Returns single number reflecting benefit of scheduling SU +/// in the current cycle. +signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { + // Initial trivial priority. + signed ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Adaptable scheduling + // A small, but very parallel + // region, where reg pressure is an issue. + if (HorizontalVerticalBalance > RegPressureThreshold) { + // Critical path first + ResCount += (SU->getHeight() * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Consider change to reg pressure from scheduling + // this SU. + ResCount -= (regPressureDelta(SU,true) * ScaleOne); + } + // Default heuristic, greeady and + // critical path driven. + else { + // Critical path first. + ResCount += (SU->getHeight() * ScaleTwo); + // Now see how many instructions is blocked by this SU. + ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + ResCount -= (regPressureDelta(SU) * ScaleTwo); + } + + // These are platform specific things. + // Will need to go into the back end + // and accessed from here via a hook. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.isCall()) + ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + } + else + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + ResCount += PriorityFive; + break; + + case ISD::INLINEASM: + ResCount += PriorityFour; + break; + } + } + return ResCount; +} + + +/// Main resource tracking point. +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { + // Use NULL entry as an event marker to reset + // the DFA state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + return; + } + + const SDNode *ScegN = SU->getNode(); + // Update reg pressure tracking. + // First update current node. + if (ScegN->isMachineOpcode()) { + // Estimate generated regs. + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + MVT VT = ScegN->getSimpleValueType(i); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) + RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); + } + } + // Estimate killed regs. + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) { + if (RegPressure[RC->getID()] > + (numberRCValPredInSU(SU, RC->getID()))) + RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); + else RegPressure[RC->getID()] = 0; + } + } + } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + continue; + --I->getSUnit()->NumRegDefsLeft; + } + } + + // Reserve resources for this SU. + reserveResources(SU); + + // Adjust number of parallel live ranges. + // Heuristic is simple - node with no data successors reduces + // number of live ranges. All others, increase it. + unsigned NumberNonControlDeps = 0; + + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + adjustPriorityOfUnscheduledPreds(I->getSUnit()); + if (!I->isCtrl()) + NumberNonControlDeps++; + } + + if (!NumberNonControlDeps) { + if (ParallelLiveRanges >= SU->NumPreds) + ParallelLiveRanges -= SU->NumPreds; + else + ParallelLiveRanges = 0; + + } + else + ParallelLiveRanges += SU->NumRegDefsLeft; + + // Track parallel live chains. + HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); + HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); +} + +void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { + unsigned NodeNumDefs = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + // No register need be allocated for this. + if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + NodeNumDefs = 0; + break; + } + NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); + } + else + switch(N->getOpcode()) { + default: break; + case ISD::CopyFromReg: + NodeNumDefs++; + break; + case ISD::INLINEASM: + NodeNumDefs++; + break; + } + + SU->NumRegDefsLeft = NodeNumDefs; +} + +/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +/// Main access point - returns next instructions +/// to be placed in scheduling sequence. +SUnit *ResourcePriorityQueue::pop() { + if (empty()) + return 0; + + std::vector<SUnit *>::iterator Best = Queue.begin(); + if (!DisableDFASched) { + signed BestCost = SUSchedulingCost(*Best); + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) { + + if (SUSchedulingCost(*I) > BestCost) { + BestCost = SUSchedulingCost(*I); + Best = I; + } + } + } + // Use default TD scheduling mechanism. + else { + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + } + + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + + Queue.pop_back(); + + return V; +} + + +void ResourcePriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + + Queue.pop_back(); +} + + +#ifdef NDEBUG +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { + ResourcePriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h new file mode 100644 index 0000000..4af7172 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -0,0 +1,114 @@ +//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDDbgValue class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H +#define LLVM_CODEGEN_SDNODEDBGVALUE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/DebugLoc.h" + +namespace llvm { + +class MDNode; +class SDNode; +class Value; + +/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// We do not use SDValue here to avoid including its header. + +class SDDbgValue { +public: + enum DbgValueKind { + SDNODE = 0, // value is the result of an expression + CONST = 1, // value is a constant + FRAMEIX = 2 // value is contents of a stack location + }; +private: + enum DbgValueKind kind; + union { + struct { + SDNode *Node; // valid for expressions + unsigned ResNo; // valid for expressions + } s; + const Value *Const; // valid for constants + unsigned FrameIx; // valid for stack objects + } u; + MDNode *mdPtr; + uint64_t Offset; + DebugLoc DL; + unsigned Order; + bool Invalid; +public: + // Constructor for non-constants. + SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl, + unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), + Invalid(false) { + kind = SDNODE; + u.s.Node = N; + u.s.ResNo = R; + } + + // Constructor for constants. + SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, + unsigned O) : + mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + kind = CONST; + u.Const = C; + } + + // Constructor for frame indices. + SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : + mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + kind = FRAMEIX; + u.FrameIx = FI; + } + + // Returns the kind. + DbgValueKind getKind() { return kind; } + + // Returns the MDNode pointer. + MDNode *getMDPtr() { return mdPtr; } + + // Returns the SDNode* for a register ref + SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; } + + // Returns the ResNo for a register ref + unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; } + + // Returns the Value* for a constant + const Value *getConst() { assert (kind==CONST); return u.Const; } + + // Returns the FrameIx for a stack object + unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + + // Returns the offset. + uint64_t getOffset() { return Offset; } + + // Returns the DebugLoc. + DebugLoc getDebugLoc() { return DL; } + + // Returns the SDNodeOrder. This is the order of the preceding node in the + // input. + unsigned getOrder() { return Order; } + + // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + // property. A SDDbgValue is invalid if the SDNode that produces the value is + // deleted. + void setIsInvalidated() { Invalid = true; } + bool isInvalidated() { return Invalid; } +}; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h new file mode 100644 index 0000000..7e7b897 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -0,0 +1,56 @@ +//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDNodeOrdering class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SDNODEORDERING_H +#define LLVM_CODEGEN_SDNODEORDERING_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +class SDNode; + +/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each +/// SDNode that roughly corresponds to the ordering of the original LLVM +/// instruction. This is used for turning off scheduling, because we'll forgo +/// the normal scheduling algorithms and output the instructions according to +/// this ordering. +class SDNodeOrdering { + DenseMap<const SDNode*, unsigned> OrderMap; + + void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; + SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; +public: + SDNodeOrdering() {} + + void add(const SDNode *Node, unsigned NewOrder) { + unsigned &OldOrder = OrderMap[Node]; + if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) + OldOrder = NewOrder; + } + void remove(const SDNode *Node) { + DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node); + if (Itr != OrderMap.end()) + OrderMap.erase(Itr); + } + void clear() { + OrderMap.clear(); + } + unsigned getOrder(const SDNode *Node) { + return OrderMap[Node]; + } +}; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp new file mode 100644 index 0000000..d1f36cb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -0,0 +1,799 @@ +//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a fast scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "InstrEmitter.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical copies"); + +static RegisterScheduler + fastDAGScheduler("fast", "Fast suboptimal list scheduling", + createFastDAGScheduler); +static RegisterScheduler + linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", + createDAGLinearizer); + + +namespace { + /// FastPriorityQueue - A degenerate priority queue that considers + /// all nodes to have the same priority. + /// + struct FastPriorityQueue { + SmallVector<SUnit *, 16> Queue; + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.back(); + Queue.pop_back(); + return V; + } + }; + +//===----------------------------------------------------------------------===// +/// ScheduleDAGFast - The actual "fast" list scheduler implementation. +/// +class ScheduleDAGFast : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + FastPriorityQueue AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + +public: + ScheduleDAGFast(MachineFunction &mf) + : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + void AddPred(SUnit *SU, const SDep &D) { + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + void RemovePred(SUnit *SU, const SDep &D) { + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit*, unsigned); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void ListScheduleBottomUp(); + + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGFast::Schedule() { + DEBUG(dbgs() << "********** List Scheduling **********\n"); + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(NULL); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue.push(PredSU); + } +} + +void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getGluedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Glue) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Glue) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = newSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = newSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + } + + SDep ChainPred; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPred = *I; + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + if (ChainPred.getSUnit()) { + RemovePred(SU, ChainPred); + if (isNewLoad) + AddPred(LoadSU, ChainPred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) { + AddPred(LoadSU, Pred); + } + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + if (isNewLoad) { + SDep D(LoadSU, SDep::Barrier); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + } + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + RemovePred(DelDeps[i].first, DelDeps[i].second); + } + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); + + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { + if (RegAdded.insert(*AI)) { + LRegs.push_back(*AI); + Added = true; + } + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector<unsigned, 4> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + if (!Node->isMachineOpcode()) + continue; + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) + continue; + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGFast::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue.push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue.pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue.pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try code duplication or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + if (!CurSU) { + // Try duplicating the nodes that produces these + // "expensive to copy" values to break the dependency. In case even + // that doesn't work, insert cross class copies. + SUnit *TrySU = NotReady[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is the same as RC, then it must be + // possible copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's + // possible to copy the value but it require cross register class copies + // and it is expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. + SUnit *NewDef = 0; + if (DestRC != RC) { + NewDef = CopyAndMoveSuccessors(LRDef); + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical " + "register dependency!"); + } + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + if (!CurSU) { + llvm_unreachable("Unable to resolve live physical register dependencies!"); + } + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue.push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order since it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/true); +#endif +} + + +namespace { +//===----------------------------------------------------------------------===// +// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the +// DAG in topological order. +// IMPORTANT: this may not work for targets with phyreg dependency. +// +class ScheduleDAGLinearize : public ScheduleDAGSDNodes { +public: + ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + +private: + std::vector<SDNode*> Sequence; + DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user + + void ScheduleNode(SDNode *N); +}; +} // end anonymous namespace + +void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { + if (N->getNodeId() != 0) + llvm_unreachable(0); + + if (!N->isMachineOpcode() && + (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) + // These nodes do not need to be translated into MIs. + return; + + DEBUG(dbgs() << "\n*** Scheduling: "); + DEBUG(N->dump(DAG)); + Sequence.push_back(N); + + unsigned NumOps = N->getNumOperands(); + if (unsigned NumLeft = NumOps) { + SDNode *GluedOpN = 0; + do { + const SDValue &Op = N->getOperand(NumLeft-1); + SDNode *OpN = Op.getNode(); + + if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { + // Schedule glue operand right above N. + GluedOpN = OpN; + assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); + OpN->setNodeId(0); + ScheduleNode(OpN); + continue; + } + + if (OpN == GluedOpN) + // Glue operand is already scheduled. + continue; + + DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); + if (DI != GluedMap.end() && DI->second != N) + // Users of glues are counted against the glued users. + OpN = DI->second; + + unsigned Degree = OpN->getNodeId(); + assert(Degree > 0 && "Predecessor over-released!"); + OpN->setNodeId(--Degree); + if (Degree == 0) + ScheduleNode(OpN); + } while (--NumLeft); + } +} + +/// findGluedUser - Find the representative use of a glue value by walking +/// the use chain. +static SDNode *findGluedUser(SDNode *N) { + while (SDNode *Glued = N->getGluedUser()) + N = Glued; + return N; +} + +void ScheduleDAGLinearize::Schedule() { + DEBUG(dbgs() << "********** DAG Linearization **********\n"); + + SmallVector<SDNode*, 8> Glues; + unsigned DAGSize = 0; + for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), + E = DAG->allnodes_end(); I != E; ++I) { + SDNode *N = I; + + // Use node id to record degree. + unsigned Degree = N->use_size(); + N->setNodeId(Degree); + unsigned NumVals = N->getNumValues(); + if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && + N->hasAnyUseOfValue(NumVals-1)) { + SDNode *User = findGluedUser(N); + if (User) { + Glues.push_back(N); + GluedMap.insert(std::make_pair(N, User)); + } + } + + if (N->isMachineOpcode() || + (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) + ++DAGSize; + } + + for (unsigned i = 0, e = Glues.size(); i != e; ++i) { + SDNode *Glue = Glues[i]; + SDNode *GUser = GluedMap[Glue]; + unsigned Degree = Glue->getNodeId(); + unsigned UDegree = GUser->getNodeId(); + + // Glue user must be scheduled together with the glue operand. So other + // users of the glue operand must be treated as its users. + SDNode *ImmGUser = Glue->getGluedUser(); + for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); + ui != ue; ++ui) + if (*ui == ImmGUser) + --Degree; + GUser->setNodeId(UDegree + Degree); + Glue->setNodeId(1); + } + + Sequence.reserve(DAGSize); + ScheduleNode(DAG->getRoot().getNode()); +} + +MachineBasicBlock* +ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + + DEBUG({ + dbgs() << "\n*** Final schedule ***\n"; + }); + + // FIXME: Handle dbg_values. + unsigned NumNodes = Sequence.size(); + for (unsigned i = 0; i != NumNodes; ++i) { + SDNode *N = Sequence[NumNodes-i-1]; + DEBUG(N->dump(DAG)); + Emitter.EmitNode(N, false, false, VRBaseMap); + } + + DEBUG(dbgs() << '\n'); + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGFast(*IS->MF); +} + +llvm::ScheduleDAGSDNodes * +llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGLinearize(*IS->MF); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 0000000..c009cfc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,3039 @@ +//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical register copies"); + +static RegisterScheduler + burrListDAGScheduler("list-burr", + "Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); +static RegisterScheduler + sourceListDAGScheduler("source", + "Similar to list-burr but schedules in source " + "order when possible", + createSourceListDAGScheduler); + +static RegisterScheduler + hybridListDAGScheduler("list-hybrid", + "Bottom-up register pressure aware list scheduling " + "which tries to balance latency and register pressure", + createHybridListDAGScheduler); + +static RegisterScheduler + ILPListDAGScheduler("list-ilp", + "Bottom-up register pressure aware list scheduling " + "which tries to balance ILP and register pressure", + createILPListDAGScheduler); + +static cl::opt<bool> DisableSchedCycles( + "disable-sched-cycles", cl::Hidden, cl::init(false), + cl::desc("Disable cycle-level precision during preRA scheduling")); + +// Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. +static cl::opt<bool> DisableSchedRegPressure( + "disable-sched-reg-pressure", cl::Hidden, cl::init(false), + cl::desc("Disable regpressure priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedLiveUses( + "disable-sched-live-uses", cl::Hidden, cl::init(true), + cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedVRegCycle( + "disable-sched-vrcycle", cl::Hidden, cl::init(false), + cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); +static cl::opt<bool> DisableSchedStalls( + "disable-sched-stalls", cl::Hidden, cl::init(true), + cl::desc("Disable no-stall priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedCriticalPath( + "disable-sched-critical-path", cl::Hidden, cl::init(false), + cl::desc("Disable critical path priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedHeight( + "disable-sched-height", cl::Hidden, cl::init(false), + cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt<bool> Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); + +static cl::opt<int> MaxReorderWindow( + "max-sched-reorder", cl::Hidden, cl::init(6), + cl::desc("Number of instructions to allow ahead of the critical path " + "in sched=list-ilp")); + +static cl::opt<unsigned> AvgIPC( + "sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle whan no target itinerary exists.")); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// +class ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: + /// NeedLatency - True if the scheduler will make use of latency information. + /// + bool NeedLatency; + + /// AvailableQueue - The priority queue to use for the available SUnits. + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// CurCycle - The current scheduler state corresponds to this cycle. + unsigned CurCycle; + + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + + /// IssueCount - Count instructions issued in this cycle + /// Currently valid only for bottom-up scheduling. + unsigned IssueCount; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<SUnit*> LiveRegGens; + + // Collect interferences between physical register use/defs. + // Each interference is an SUnit and set of physical registers. + SmallVector<SUnit*, 4> Interferences; + typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; + LRegsMapT LRegsMap; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + + // Hack to keep track of the inverse of FindCallSeqStart without more crazy + // DAG crawling. + DenseMap<SUnit*, SUnit*> CallSeqEndForStart; + +public: + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), + NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + Topo(SUnits, NULL) { + + const TargetMachine &tm = mf.getTarget(); + if (DisableSchedCycles || !NeedLatency) + HazardRec = new ScheduleHazardRecognizer(); + else + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } + + ~ScheduleDAGRRList() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + + ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } + + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will + /// create a cycle. + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Updates the topological ordering if required. + void AddPred(SUnit *SU, const SDep &D) { + Topo.AddPred(SU, D.getSUnit()); + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + /// Updates the topological ordering if required. + void RemovePred(SUnit *SU, const SDep &D) { + Topo.RemovePred(SU, D.getSUnit()); + SU->removePred(D); + } + +private: + bool isReady(SUnit *SU) { + return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + + void ReleasePred(SUnit *SU, const SDep *PredEdge); + void ReleasePredecessors(SUnit *SU); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); + void ScheduleNodeBottomUp(SUnit*); + void CapturePred(SDep *PredEdge); + void UnscheduleNodeBottomUp(SUnit*); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + + void releaseInterferences(unsigned Reg = 0); + + SUnit *PickNodeToScheduleBottomUp(); + void ListScheduleBottomUp(); + + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. + /// Updates the topological ordering if required. + SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = newSUnit(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// CreateClone - Creates a new SUnit from an existing one. + /// Updates the topological ordering if required. + SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = Clone(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// need actual latency information but the hybrid scheduler does. + bool forceUnitLatencies() const { + return !NeedLatency; + } +}; +} // end anonymous namespace + +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::Untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost, + const MachineFunction &MF) { + MVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::Untyped) { + const SDNode *Node = RegDefPos.GetNode(); + + // Special handling for CopyFromReg of untyped values. + if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Opcode = Node->getMachineOpcode(); + if (Opcode == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Idx = RegDefPos.GetIdx(); + const MCInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); + + CurCycle = 0; + IssueCount = 0; + MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; + NumLiveRegs = 0; + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); + LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + CallSeqEndForStart.clear(); + assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); + + // Build the scheduling graph. + BuildSchedGraph(NULL); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + Topo.InitDAGTopologicalSorting(); + + AvailableQueue->initNodes(SUnits); + + HazardRec->Reset(); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); + + AvailableQueue->releaseState(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + + if (!forceUnitLatencies()) { + // Updating predecessor's height. This is now the cycle when the + // predecessor can be scheduled without causing a pipeline stall. + PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); + } + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(PredSU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } + } +} + +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, + unsigned NestLevel, + const TargetInstrInfo *TII) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) + return true; + return false; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + if (NestLevel == 0) + return false; + --NestLevel; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = 0; + unsigned BestMaxNest = MaxNest; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NestLevel != 0); + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return 0; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return 0; + } +} + +/// Call ReleasePred for each predecessor, then update register live def/gen. +/// Always update LiveRegDefs for a register dependence even if the current SU +/// also defines the register. This effectively create one large live range +/// across a sequence of two-address node. This is important because the +/// entire chain must be scheduled together. Example: +/// +/// flags = (3) add +/// flags = (2) addc flags +/// flags = (1) addc flags +/// +/// results in +/// +/// LiveRegDefs[flags] = 3 +/// LiveRegGens[flags] = 1 +/// +/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid +/// interference on flags. +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; + assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && + "interference on register dependence"); + LiveRegDefs[I->getReg()] = I->getSUnit(); + if (!LiveRegGens[I->getReg()]) { + ++NumLiveRegs; + LiveRegGens[I->getReg()] = SU; + } + } + } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding + // CALLSEQ_BEGIN. Inject an artificial physical register dependence between + // these nodes, to prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + CallSeqEndForStart[Def] = SU; + + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } +} + +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + if (DisableSchedCycles) { + assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); + return; + } + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = PendingQueue[i]->getHeight(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + IssueCount = 0; + AvailableQueue->setCurCycle(NextCycle); + if (!HazardRec->isEnabled()) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + HazardRec->RecedeCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (DisableSchedCycles) + return; + + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + + unsigned ReadyCycle = SU->getHeight(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, -Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + if (!HazardRec->isEnabled()) + return; + + // Check for phys reg copy. + if (!SU->getNode()) + return; + + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); +} + +static void resetVRegCycle(SUnit *SU); + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { + DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + +#ifndef NDEBUG + if (CurCycle < SU->getHeight()) + DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); +#endif + + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node its ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. + SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled intruction. + EmitNode(SU); + + Sequence.push_back(SU); + + AvailableQueue->scheduledNode(SU); + + // If HazardRec is disabled, and each inst counts as one cycle, then + // advance CurCycle before ReleasePredecessors to avoid useless pushes to + // PendingQueue for schedulers that implement HasReadyFilter. + if (!HazardRec->isEnabled() && AvgIPC < 2) + AdvanceToCycle(CurCycle + 1); + + // Update liveness of predecessors before successors to avoid treating a + // two-address node as a live range def. + ReleasePredecessors(SU); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); + } + } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); + } + } + + resetVRegCycle(SU); + + SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. + // + // Check AvailableQueue after ReleasePredecessors in case of zero latency. + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + DEBUG(SU->dump(this)); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(&*I); + if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); + } + } + + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = CallSeqEndForStart[SU]; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); + } + } + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) + ++NumLiveRegs; + // This becomes the nearest def. Note that an earlier def may still be + // pending if this is a two-address node. + LiveRegDefs[I->getReg()] = SU; + if (LiveRegGens[I->getReg()] == NULL || + I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) + LiveRegGens[I->getReg()] = I->getSUnit(); + } + } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); + + SU->setHeightDirty(); + SU->isScheduled = false; + SU->isAvailable = true; + if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } + AvailableQueue->unscheduledNode(SU); +} + +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { + Sequence.pop_back(); + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); + UnscheduleNodeBottomUp(OldSU); + AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); + } + + assert(!SU->isSucc(OldSU) && "Something is wrong!"); + + RestoreHazardCheckerBottomUp(); + + ReleasePending(); + + ++NumBacktracks; +} + +static bool isOperandOf(const SUnit *SU, SDNode *N) { + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isOperandOf(N)) + return true; + } + return false; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + if (SU->getNode()->getGluedNode()) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Glue) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Glue) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return NULL; + + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); + computeLatency(LoadSU); + } + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector<SDep, 4> ChainPreds; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPreds.push_back(*I); + else if (isOperandOf(I->getSUnit(), LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + // Now assign edges to the newly-created nodes. + for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { + const SDep &Pred = ChainPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled + && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); + NewSU = CreateClone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = CreateNewSUnit(NULL); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = CreateNewSUnit(NULL); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + else { + // Avoid scheduling the def-side copy before other successors. Otherwise + // we could introduce another physreg interference on the copy and + // continue inserting copies indefinitely. + AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(CopyFromSU); + AvailableQueue->addNode(CopyToSU); + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { + + // Check if Ref is live. + if (!LiveRegDefs[*AliasI]) continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[*AliasI] == SU) continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(*AliasI)) { + LRegs.push_back(*AliasI); + } + } +} + +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs) { + // Look at all live registers. Skip Reg0 and the special CallResource. + for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { + if (!LiveRegDefs[i]) continue; + if (LiveRegDefs[i] == SU) continue; + if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; + if (RegAdded.insert(i)) + LRegs.push_back(i); + } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (const RegisterMaskSDNode *Op = + dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) + return Op->getRegMask(); + return NULL; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList:: +DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + // + // If SU is the currently live definition of the same register that it uses, + // then we are free to schedule it. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + + if (!Node->isMachineOpcode()) + continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + // Check the special calling-sequence resource. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource]) { + SDNode *Gen = LiveRegGens[CallResource]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + LRegs.push_back(CallResource); + } + } + if (const uint32_t *RegMask = getNodeRegMask(Node)) + CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) + continue; + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + + return !LRegs.empty(); +} + +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = Interferences.size(); i > 0; --i) { + SUnit *SU = Interferences[i-1]; + LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); + if (Reg) { + SmallVector<unsigned, 4> &LRegs = LRegsPos->second; + if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) + continue; + } + SU->isPending = false; + // The interfering node may no longer be available due to backtracking. + // Furthermore, it may have been made available again, in which case it is + // now already in the AvailableQueue. + if (SU->isAvailable && !SU->NodeQueueId) { + DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + AvailableQueue->push(SU); + } + if (i < Interferences.size()) + Interferences[i-1] = Interferences.back(); + Interferences.pop_back(); + LRegsMap.erase(LRegsPos); + } +} + +/// Return a node that can be scheduled in this cycle. Requirements: +/// (1) Ready: latency has been satisfied +/// (2) No Hazards: resources are available +/// (3) No Interferences: may unschedule to break register interferences. +SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { + SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + DEBUG(dbgs() << " Interfering reg " << + (LRegs[0] == TRI->getNumRegs() ? "CallResource" + : TRI->getName(LRegs[0])) + << " SU #" << CurSU->NodeNum << '\n'); + std::pair<LRegsMapT::iterator, bool> LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Intereferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } + CurSU = AvailableQueue->pop(); + } + if (CurSU) + return CurSU; + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + SUnit *TrySU = Interferences[i]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + SUnit *BtSU = NULL; + unsigned LiveCycle = UINT_MAX; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + if (LiveRegGens[Reg]->getHeight() < LiveCycle) { + BtSU = LiveRegGens[Reg]; + LiveCycle = BtSU->getHeight(); + } + } + if (!WillCreateCycle(TrySU, BtSU)) { + // BacktrackBottomUp mutates Interferences! + BacktrackBottomUp(TrySU, BtSU); + + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (BtSU->isAvailable) { + BtSU->isAvailable = false; + if (!BtSU->isPending) + AvailableQueue->remove(BtSU); + } + DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" + << TrySU->NodeNum << ")\n"); + AddPred(TrySU, SDep(BtSU, SDep::Artificial)); + + // If one or more successors has been unscheduled, then the current + // node is no longer available. + if (!TrySU->isAvailable) + CurSU = AvailableQueue->pop(); + else { + AvailableQueue->remove(TrySU); + CurSU = TrySU; + } + // Interferences has been mutated. We must break. + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = Interferences[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is the same as RC, then it must be possible + // copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's possible to + // copy the value but it require cross register class copies and it is + // expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. + SUnit *NewDef = 0; + if (DestRC != RC) { + NewDef = CopyAndMoveSuccessors(LRDef); + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical register dependency!"); + } + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + assert(CurSU && "Unable to resolve live physical register dependencies!"); + return CurSU; +} + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !Interferences.empty()) { + DEBUG(dbgs() << "\nExamining Available:\n"; + AvailableQueue->dump(this)); + + // Pick the best node to schedule taking all constraints into + // consideration. + SUnit *SU = PickNodeToScheduleBottomUp(); + + AdvancePastStalls(SU); + + ScheduleNodeBottomUp(SU); + + while (AvailableQueue->empty() && !PendingQueue.empty()) { + // Advance the cycle to free resources. Skip ahead to the next ready SU. + assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); + } + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/true); +#endif +} + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Definition +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { +class RegReductionPQBase; + +struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { + bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } +}; + +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} + reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + +/// bu_ls_rr_sort - Priority function for bottom up register pressure +// reduction scheduler. +struct bu_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// src_ls_rr_sort - Priority function for source order scheduler. +struct src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + src_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + src_ls_rr_sort(const src_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// hybrid_ls_rr_sort - Priority function for hybrid scheduler. +struct hybrid_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + hybrid_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) +// scheduler. +struct ilp_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + ilp_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +class RegReductionPQBase : public SchedulingPriorityQueue { +protected: + std::vector<SUnit*> Queue; + unsigned CurQueueId; + bool TracksRegPressure; + bool SrcOrder; + + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + MachineFunction &MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + +public: + RegReductionPQBase(MachineFunction &mf, + bool hasReadyFilter, + bool tracksrp, + bool srcorder, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : SchedulingPriorityQueue(hasReadyFilter), + CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); + } + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + ScheduleHazardRecognizer* getHazardRec() { + return scheduleDAG->getHazardRec(); + } + + void initNodes(std::vector<SUnit> &sunits); + + void addNode(const SUnit *SU); + + void updateNode(const SUnit *SU); + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + } + + unsigned getNodePriority(const SUnit *SU) const; + + unsigned getNodeOrdering(const SUnit *SU) const { + if (!SU->getNode()) return 0; + + return scheduleDAG->DAG->GetOrdering(SU->getNode()); + } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), + SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); + SU->NodeQueueId = 0; + } + + bool tracksRegPressure() const { return TracksRegPressure; } + + void dumpRegPressure() const; + + bool HighRegPressure(const SUnit *SU) const; + + bool MayReduceRegPressure(SUnit *SU) const; + + int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; + + void scheduledNode(SUnit *SU); + + void unscheduledNode(SUnit *SU); + +protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); +}; + +template<class SF> +static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); + } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} + +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { + SF Picker; + +public: + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + bool srcorder, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), + Picker(this) {} + + bool isBottomUp() const { return SF::IsBottomUp; } + + bool isReady(SUnit *U) const { + return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); + } + + SUnit *pop() { + if (Queue.empty()) return NULL; + + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); + V->NodeQueueId = 0; + return V; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump(ScheduleDAG *DAG) const { + // Emulate pop() without clobbering NodeQueueIds. + std::vector<SUnit*> DumpQueue = Queue; + SF DumpPicker = Picker; + while (!DumpQueue.empty()) { + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); + dbgs() << "Height " << SU->getHeight() << ": "; + SU->dump(DAG); + } + } +#endif +}; + +typedef RegReductionPriorityQueue<bu_ls_rr_sort> +BURegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<src_ls_rr_sort> +SrcRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> +HybridBURRPriorityQueue; + +typedef RegReductionPriorityQueue<ilp_ls_rr_sort> +ILPBURRPriorityQueue; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Static Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { + unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + + return SethiUllmanNumber; +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +void RegReductionPQBase::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} + +void RegReductionPQBase::addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +void RegReductionPQBase::updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +// Lower priority means schedule further down. For bottom-up scheduling, lower +// priority SUs are scheduled before higher priority SUs. +unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; +#if 1 + return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif +} + +//===----------------------------------------------------------------------===// +// Register Pressure Tracking +//===----------------------------------------------------------------------===// + +void RegReductionPQBase::dumpRegPressure() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] + << '\n'); + } +#endif +} + +bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + } + return false; +} + +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { + const SDNode *N = SU->getNode(); + + if (!N->isMachineOpcode() || !SU->NumSuccs) + return false; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = N->getSimpleValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; + } + return false; +} + +// Compute the register pressure contribution by this instruction by count up +// for uses that are not live and down for defs. Only count register classes +// that are already under high pressure. As a side effect, compute the number of +// uses of registers that are already live. +// +// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure +// so could probably be factored. +int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { + LiveUses = 0; + int PDiff = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + if (PredSU->getNode()->isMachineOpcode()) + ++LiveUses; + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + MVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + ++PDiff; + } + } + const SDNode *N = SU->getNode(); + + if (!N || !N->isMachineOpcode() || !SU->NumSuccs) + return PDiff; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = N->getSimpleValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + --PDiff; + } + return PDiff; +} + +void RegReductionPQBase::scheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + if (!SU->getNode()) + return; + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + // FIXME: The ScheduleDAG currently loses information about which of a + // node's values is consumed by each dependence. Consequently, if the node + // defines multiple register classes, we don't know which to pressurize + // here. Instead the following loop consumes the register defs in an + // arbitrary order. At least it handles the common case of clustered loads + // to the same class. For precise liveness, each SDep needs to indicate the + // result number. But that tightly couples the ScheduleDAG with the + // SelectionDAG making updates tricky. A simpler hack would be to attach a + // value type or register class to SDep. + // + // The most important aspect of register tracking is balancing the increase + // here with the reduction further below. Note that this SU may use multiple + // defs in PredSU. The can't be determined here, but we've already + // compensated by reducing NumRegDefsLeft in PredSU during + // ScheduleDAGSDNodes::AddSchedEdges. + --PredSU->NumRegDefsLeft; + unsigned SkipRegDefs = PredSU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs) + continue; + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + RegPressure[RCId] += Cost; + break; + } + } + + // We should have this assert, but there may be dead SDNodes that never + // materialize as SUnits, so they don't appear to generate liveness. + //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); + int SkipRegDefs = (int)SU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs > 0) + continue; + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + if (RegPressure[RCId] < Cost) { + // Register pressure tracking is imprecise. This can happen. But we try + // hard not to let it happen because it likely results in poor scheduling. + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); + RegPressure[RCId] = 0; + } + else { + RegPressure[RCId] -= Cost; + } + } + dumpRegPressure(); +} + +void RegReductionPQBase::unscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N) return; + + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only + // counts data deps. + if (PredSU->NumSuccsLeft != PredSU->Succs.size()) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + MVT VT = PN->getSimpleValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG || + POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + MVT VT = PN->getSimpleValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = PN->getSimpleValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); +} + +//===----------------------------------------------------------------------===// +// Dynamic Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxHeight = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain succs + unsigned Height = I->getSUnit()->getHeight(); + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(I->getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; + } + return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Scratches++; + } + return Scratches; +} + +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *PredSU = I->getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are +/// CopyToReg to a virtual register. This SU def is probably a liveout and +/// it has no other use. It should be scheduled closer to the terminator. +static bool hasOnlyLiveOutUses(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { + unsigned Reg = + cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + I->getSUnit()->isVRegCycle = true; + } +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + I->getSUnit()->isVRegCycle = 0; + } + } +} + +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->isVRegCycle && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); + return true; + } + } + return false; +} + +// Check for either a dependence (latency) or resource (hazard) stall. +// +// Note: The ScheduleHazardRecognizer interface requires a non-const SU. +static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { + if ((int)SPQ->getCurCycle() < Height) return true; + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return true; + return false; +} + +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if latency-based priority is equivalent. +static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, + RegReductionPQBase *SPQ) { + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; + + bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && + BUHasStall(left, LHeight, SPQ); + bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && + BUHasStall(right, RHeight, SPQ); + + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + if (LStall) { + if (!RStall) + return 1; + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } else if (RStall) + return -1; + + // If either node is scheduling for latency, sort them by height/depth + // and latency. + if (!checkPref || (left->SchedulingPref == Sched::ILP || + right->SchedulingPref == Sched::ILP)) { + // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer + // is enabled, grouping instructions by cycle, then its height is already + // covered so only its depth matters. We also reach this point if both stall + // but have the same height. + if (!SPQ->getHazardRec()->isEnabled()) { + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; + if (LDepth != RDepth) { + DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; + } + if (left->Latency != right->Latency) + return left->Latency > right->Latency ? 1 : -1; + } + return 0; +} + +static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + #ifndef NDEBUG + const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; + #endif + DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " + << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + + if (LPriority != RPriority) + return LPriority > RPriority; + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } + + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist != RDist) + return LDist < RDist; + + // How many registers becomes live when the node is scheduled. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch != RScratch) + return LScratch > RScratch; + + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); + + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { + int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + else { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + } + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +// Bottom up +bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + return BURRSort(left, right, SPQ); +} + +// Source order, otherwise bottom up. +bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + + return BURRSort(left, right, SPQ); +} + +// If the time between now and when the instruction will be ready can cover +// the spill code, then avoid adding it to the ready queue. This gives long +// stalls highest priority and allows hoisting across calls. It should also +// speed up processing the available queue. +bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + static const unsigned ReadyDelay = 3; + + if (SPQ->MayReduceRegPressure(SU)) return true; + + if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +// Return true if right should be scheduled with higher priority than left. +bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) { + DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" + << right->NodeNum << ")\n"); + return true; + } + else if (!LHigh && RHigh) { + DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" + << left->NodeNum << ")\n"); + return false; + } + if (!LHigh && !RHigh) { + int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + return BURRSort(left, right, SPQ); +} + +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + if (SU->getHeight() > CurCycle) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +static bool canEnableCoalescing(SUnit *SU) { + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return true; + + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return true; + + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return true; + + return false; +} + +// list-ilp is currently an experimental scheduler that allows various +// heuristics to be enabled prior to the normal register reduction logic. +bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + + unsigned LLiveUses = 0, RLiveUses = 0; + int LPDiff = 0, RPDiff = 0; + if (!DisableSchedRegPressure || !DisableSchedLiveUses) { + LPDiff = SPQ->RegPressureDiff(left, LLiveUses); + RPDiff = SPQ->RegPressureDiff(right, RLiveUses); + } + if (!DisableSchedRegPressure && LPDiff != RPDiff) { + DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff + << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); + return LPDiff > RPDiff; + } + + if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { + bool LReduce = canEnableCoalescing(left); + bool RReduce = canEnableCoalescing(right); + if (LReduce && !RReduce) return false; + if (RReduce && !LReduce) return true; + } + + if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { + DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); + return LLiveUses < RLiveUses; + } + + if (!DisableSchedStalls) { + bool LStall = BUHasStall(left, left->getHeight(), SPQ); + bool RStall = BUHasStall(right, right->getHeight(), SPQ); + if (LStall != RStall) + return left->getHeight() > right->getHeight(); + } + + if (!DisableSchedCriticalPath) { + int spread = (int)left->getDepth() - (int)right->getDepth(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum << "): " + << right->getDepth() << "\n"); + return left->getDepth() < right->getDepth(); + } + } + + if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { + int spread = (int)left->getHeight() - (int)right->getHeight(); + if (std::abs(spread) > MaxReorderWindow) + return left->getHeight() > right->getHeight(); + } + + return BURRSort(left, right, SPQ); +} + +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure && !SrcOrder) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { + for (unsigned i = 0, e = sunits.size(); i != e; ++i) { + initVRegCycle(&sunits[i]); + } + } +} + +//===----------------------------------------------------------------------===// +// Preschedule for Register Pressure +//===----------------------------------------------------------------------===// + +bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->getNode()->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; + for (unsigned i = 0; i != NumOps; ++i) { + if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { + SDNode *DU = SU->getNode()->getOperand(i).getNode(); + if (DU->getNodeId() != -1 && + Op->OrigNode == &(*SUnits)[DU->getNodeId()]) + return true; + } + } + } + return false; +} + +/// canClobberReachingPhysRegUse - True if SU would clobber one of it's +/// successor's explicit physregs whose definition can reach DepSU. +/// i.e. DepSU should not be scheduled above SU. +static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, + ScheduleDAGRRList *scheduleDAG, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + const uint16_t *ImpDefs + = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); + const uint32_t *RegMask = getNodeRegMask(SU->getNode()); + if(!ImpDefs && !RegMask) + return false; + + for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); + SI != SE; ++SI) { + SUnit *SuccSU = SI->getSUnit(); + for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), + PE = SuccSU->Preds.end(); PI != PE; ++PI) { + if (!PI->isAssignedRegDep()) + continue; + + if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + + if (ImpDefs) + for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + } + } + return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SDNode *N = SuccSU->getNode(); + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + assert(ImpDefs && "Caller should check hasPhysRegDefs"); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (!SUNode->isMachineOpcode()) + continue; + const uint16_t *SUImpDefs = + TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + const uint32_t *SURegMask = getNodeRegMask(SUNode); + if (!SUImpDefs && !SURegMask) + continue; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) + return true; + if (!SUImpDefs) + continue; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (TRI->regsOverlap(Reg, SUReg)) + return true; + } + } + } + return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +/// N +/// / | +/// / | +/// U store +/// | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +/// N +/// || +/// || +/// store +/// | +/// U +/// | +/// ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +/// +void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { + // Visit all the nodes in topological order, working top-down. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + // For now, only look at nodes with no data successors, such as stores. + // These are especially important, due to the heuristics in + // getNodePriority for nodes with no data successors. + if (SU->NumSuccs != 0) + continue; + // For now, only look at nodes with exactly one data predecessor. + if (SU->NumPreds != 1) + continue; + // Avoid prescheduling copies to virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyToReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Locate the single data predecessor. + SUnit *PredSU = 0; + for (SUnit::const_pred_iterator II = SU->Preds.begin(), + EE = SU->Preds.end(); II != EE; ++II) + if (!II->isCtrl()) { + PredSU = II->getSUnit(); + break; + } + assert(PredSU); + + // Don't rewrite edges that carry physregs, because that requires additional + // support infrastructure. + if (PredSU->hasPhysRegDefs) + continue; + // Short-circuit the case where SU is PredSU's only data successor. + if (PredSU->NumSuccs == 1) + continue; + // Avoid prescheduling to copies from virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyFromReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Perform checks on the successors of PredSU. + for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + SUnit *PredSuccSU = II->getSUnit(); + if (PredSuccSU == SU) continue; + // If PredSU has another successor with no data successors, for + // now don't attempt to choose either over the other. + if (PredSuccSU->NumSuccs == 0) + goto outer_loop_continue; + // Don't break physical register dependencies. + if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + goto outer_loop_continue; + // Don't introduce graph cycles. + if (scheduleDAG->IsReachable(SU, PredSuccSU)) + goto outer_loop_continue; + } + + // Ok, the transformation is safe and the heuristics suggest it is + // profitable. Update the graph. + DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum + << " next to PredSU #" << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); + for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { + SDep Edge = PredSU->Succs[i]; + assert(!Edge.isAssignedRegDep()); + SUnit *SuccSU = Edge.getSUnit(); + if (SuccSU != SU) { + Edge.setSUnit(PredSU); + scheduleDAG->RemovePred(SuccSU, Edge); + scheduleDAG->AddPred(SU, Edge); + Edge.setSUnit(SU); + scheduleDAG->AddPred(SuccSU, Edge); + --i; + } + } + outer_loop_continue:; + } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +void RegReductionPQBase::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + if (!SU->isTwoAddress) + continue; + + SDNode *Node = SU->getNode(); + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) + continue; + + bool isLiveOut = hasOnlyLiveOutUses(SU); + unsigned Opc = Node->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; + for (unsigned j = 0; j != NumOps; ++j) { + if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) + continue; + SDNode *DU = SU->getNode()->getOperand(j).getNode(); + if (DU->getNodeId() == -1) + continue; + const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; + if (!DUSU) continue; + for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), + E = DUSU->Succs.end(); I != E; ++I) { + if (I->isCtrl()) continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU == SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->getHeight() < SU->getHeight() && + (SU->getHeight() - SuccSU->getHeight()) > 1) + continue; + // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge + // constrains whatever is using the copy, instead of the copy + // itself. In the case that the copy is coalesced, this + // preserves the intent of the pseudo two-address heurietics. + while (SuccSU->Succs.size() == 1 && + SuccSU->getNode()->isMachineOpcode() && + SuccSU->getNode()->getMachineOpcode() == + TargetOpcode::COPY_TO_REGCLASS) + SuccSU = SuccSU->Succs.front().getSUnit(); + // Don't constrain non-instruction nodes. + if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can clobber them. + if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + continue; + } + // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; + // these may be coalesced away. We want them close to their uses. + unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); + if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || + SuccOpc == TargetOpcode::INSERT_SUBREG || + SuccOpc == TargetOpcode::SUBREG_TO_REG) + continue; + if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && + (!canClobber(SuccSU, DUSU) || + (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || + (!SU->isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, SU)) { + DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" + << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); + } + } + } + } +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + BURegReductionPriorityQueue *PQ = + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + SrcRegReductionPriorityQueue *PQ = + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); + + HybridBURRPriorityQueue *PQ = + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); + + ILPBURRPriorityQueue *PQ = + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp new file mode 100644 index 0000000..b22440d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -0,0 +1,914 @@ +//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +STATISTIC(LoadsClustered, "Number of loads clustered together"); + +// This allows latency based scheduler to notice high latency instructions +// without a target itinerary. The choise if number here has more to do with +// balancing scheduler heursitics than with the actual machine latency. +static cl::opt<int> HighLatencyCycles( + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency'" + "instructions take for targets with no itinerary")); + +ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) + : ScheduleDAG(mf), BB(0), DAG(0), + InstrItins(mf.getTarget().getInstrItineraryData()) {} + +/// Run - perform scheduling. +/// +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; + DAG = dag; + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); +} + +/// NewSUnit - Creates a new SUnit and return a ptr to it. +/// +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = 0; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + SUnit *SU = &SUnits.back(); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + if (!N || + (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); + return SU; +} + +SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { + SUnit *SU = newSUnit(Old->getNode()); + SU->OrigNode = Old->OrigNode; + SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; + SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasPhysRegDefs = Old->hasPhysRegDefs; + SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; + SU->SchedulingPref = Old->SchedulingPref; + Old->isCloned = true; + return SU; +} + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || User->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = User->getOperand(2).getResNo(); + if (Def->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (ResNo >= II.getNumDefs() && + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + PhysReg = Reg; + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); + Cost = RC->getCopyCost(); + } + } +} + +// Helper for AddGlue to clone node operands. +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, + SmallVectorImpl<EVT> &VTs, + SDValue ExtraOper = SDValue()) { + SmallVector<SDValue, 4> Ops; + for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) + Ops.push_back(N->getOperand(I)); + + if (ExtraOper.getNode()) + Ops.push_back(ExtraOper); + + SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); + MachineSDNode::mmo_iterator Begin = 0, End = 0; + MachineSDNode *MN = dyn_cast<MachineSDNode>(N); + + // Store memory references. + if (MN) { + Begin = MN->memoperands_begin(); + End = MN->memoperands_end(); + } + + DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + + // Reset the memory references + if (MN) + MN->setMemRefs(Begin, End); +} + +static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { + SmallVector<EVT, 4> VTs; + SDNode *GlueDestNode = Glue.getNode(); + + // Don't add glue from a node to itself. + if (GlueDestNode == N) return false; + + // Don't add a glue operand to something that already uses glue. + if (GlueDestNode && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + return false; + } + // Don't add glue to something that already has a glue value. + if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + + if (AddGlue) + VTs.push_back(MVT::Glue); + + CloneNodeWithValues(N, DAG, VTs, Glue); + + return true; +} + +// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the +// node even though simply shrinking the value list is sufficient. +static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { + assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue && + !N->hasAnyUseOfValue(N->getNumValues() - 1)) && + "expected an unused glue value"); + + SmallVector<EVT, 4> VTs; + for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I) + VTs.push_back(N->getValueType(I)); + + CloneNodeWithValues(N, DAG, VTs); +} + +/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. +/// This function finds loads of the same base and different offsets. If the +/// offsets are not far apart (target specific), it add MVT::Glue inputs and +/// outputs to ensure they are scheduled together and in order. This +/// optimization may benefit some targets by improving cache locality. +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = 0; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. + SmallPtrSet<SDNode*, 16> Visited; + SmallVector<int64_t, 4> Offsets; + DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. + bool Cluster = false; + SDNode *Base = Node; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E; ++I) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User)) + continue; + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2) + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. + continue; + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + } + + if (!Cluster) + return; + + // Sort them in increasing order. + std::sort(Offsets.begin(), Offsets.end()); + + // Check if the loads are close enough. + SmallVector<SDNode*, 4> Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } + + if (NumLoads == 0) + return; + + // Cluster loads by adding MVT::Glue outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + SDValue InGlue = SDValue(0, 0); + if (AddGlue(Lead, InGlue, true, DAG)) + InGlue = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutGlue = I < E - 1; + SDNode *Load = Loads[I]; + + // If AddGlue fails, we could leave an unsused glue value. This should not + // cause any + if (AddGlue(Load, InGlue, OutGlue, DAG)) { + if (OutGlue) + InGlue = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } + else if (!OutGlue && InGlue.getNode()) + RemoveUnusedGlue(InGlue.getNode(), DAG); + } +} + +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + SDNode *Node = &*NI; + if (!Node || !Node->isMachineOpcode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); + } +} + +void ScheduleDAGSDNodes::BuildSchedUnits() { + // During scheduling, the NodeId field of SDNode is used to map SDNodes + // to their associated SUnits by holding SUnits table indices. A value + // of -1 means the SDNode does not yet have an associated SUnit. + unsigned NumNodes = 0; + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + NI->setNodeId(-1); + ++NumNodes; + } + + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + // FIXME: Multiply by 2 because we may clone nodes during scheduling. + // This is a temporary workaround. + SUnits.reserve(NumNodes * 2); + + // Add all nodes in depth first order. + SmallVector<SDNode*, 64> Worklist; + SmallPtrSet<SDNode*, 64> Visited; + Worklist.push_back(DAG->getRoot().getNode()); + Visited.insert(DAG->getRoot().getNode()); + + SmallVector<SUnit*, 8> CallSUnits; + while (!Worklist.empty()) { + SDNode *NI = Worklist.pop_back_val(); + + // Add all operands to the worklist unless they've already been added. + for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) + if (Visited.insert(NI->getOperand(i).getNode())) + Worklist.push_back(NI->getOperand(i).getNode()); + + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (NI->getNodeId() != -1) continue; + + SUnit *NodeSUnit = newSUnit(NI); + + // See if anything is glued to this node, if so, add them to glued + // nodes. Nodes can have at most one glue input and one glue output. Glue + // is required to be the last operand and result of a node. + + // Scan up to find glued preds. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + N = N->getOperand(N->getNumOperands()-1).getNode(); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; + } + + // Scan down to find any glued succs. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { + SDValue GlueVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Glue result. + bool HasGlueUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (GlueVal.isOperandOf(*UI)) { + HasGlueUse = true; + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + N = *UI; + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; + break; + } + if (!HasGlueUse) break; + } + + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + + // If there are glue operands involved, N is now the bottom-most node + // of the sequence of nodes that are glued together. + // Update the SUnit. + NodeSUnit->setNode(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + + // Compute NumRegDefsLeft. This must be done before AddSchedEdges. + InitNumRegDefsLeft(NodeSUnit); + + // Assign the Latency field of NodeSUnit using target-provided information. + computeLatency(NodeSUnit); + } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } +} + +void ScheduleDAGSDNodes::AddSchedEdges() { + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = forceUnitLatencies(); + + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->getNode(); + + if (MainNode->isMachineOpcode()) { + unsigned Opc = MainNode->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode() && + TII->get(N->getMachineOpcode()).getImplicitDefs()) { + SU->hasPhysRegClobbers = true; + unsigned NumUsed = InstrEmitter::CountResults(N); + while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) + --NumUsed; // Skip over unused values at the end. + if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) + SU->hasPhysRegDefs = true; + } + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).getNode(); + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = &SUnits[OpN->getNodeId()]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + EVT OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + assert((PhysReg == 0 || !isChain) && + "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0 && !StressSched) + PhysReg = 0; + + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + + SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) + : SDep(OpSU, SDep::Data, PhysReg); + Dep.setLatency(OpLatency); + if (!isChain && !UnitLatencies) { + computeOperandLatency(OpN, N, i, Dep); + ST.adjustSchedDependency(OpSU, SU, Dep); + } + + if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { + // Multiple register uses are combined in the same SUnit. For example, + // we could have a set of glued nodes with all their defs consumed by + // another set of glued nodes. Register pressure tracking sees this as + // a single use, so to keep pressure balanced we reduce the defs. + // + // We can't tell (without more book-keeping) if this results from + // glued nodes or duplicate operands. As long as we don't reduce + // NumRegDefsLeft to zero, we handle the common cases well. + --OpSU->NumRegDefsLeft; + } + } + } + } +} + +/// BuildSchedGraph - Build the SUnit graph from the selection dag that we +/// are input. This SUnit graph is similar to the SelectionDAG, but +/// excludes nodes that aren't interesting to scheduling, and represents +/// glued together nodes with a single SUnit. +void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); + // Populate the SUnits array. + BuildSchedUnits(); + // Compute all the scheduling dependencies between nodes. + AddSchedEdges(); +} + +// Initialize NumNodeDefs for the current Node's opcode. +void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + // Check for phys reg copy. + if (!Node) + return; + + if (!Node->isMachineOpcode()) { + if (Node->getOpcode() == ISD::CopyFromReg) + NodeNumDefs = 1; + else + NodeNumDefs = 0; + return; + } + unsigned POpc = Node->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) { + // No register need be allocated for this. + NodeNumDefs = 0; + return; + } + unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); + // Some instructions define regs that are not represented in the selection DAG + // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. + NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); + DefIdx = 0; +} + +// Construct a RegDefIter for this SUnit and find the first valid value. +ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, + const ScheduleDAGSDNodes *SD) + : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { + InitNodeNumDefs(); + Advance(); +} + +// Advance to the next valid value defined by the SUnit. +void ScheduleDAGSDNodes::RegDefIter::Advance() { + for (;Node;) { // Visit all glued nodes. + for (;DefIdx < NodeNumDefs; ++DefIdx) { + if (!Node->hasAnyUseOfValue(DefIdx)) + continue; + ValueType = Node->getSimpleValueType(DefIdx); + ++DefIdx; + return; // Found a normal regdef. + } + Node = Node->getGluedNode(); + if (Node == NULL) { + return; // No values left to visit. + } + InitNodeNumDefs(); + } +} + +void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { + assert(SU->NumRegDefsLeft == 0 && "expect a new node"); + for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { + assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); + ++SU->NumRegDefsLeft; + } +} + +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + + // Check to see if the scheduler cares about latencies. + if (forceUnitLatencies()) { + SU->Latency = 1; + return; + } + + if (!InstrItins || InstrItins->isEmpty()) { + if (N && N->isMachineOpcode() && + TII->isHighLatencyDef(N->getMachineOpcode())) + SU->Latency = HighLatencyCycles; + else + SU->Latency = 1; + return; + } + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes glued together into this SUnit. + SU->Latency = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) + SU->Latency += TII->getInstrLatency(InstrItins, N); +} + +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const{ + // Check to see if the scheduler cares about latencies. + if (forceUnitLatencies()) + return; + + if (dep.getKind() != SDep::Data) + return; + + unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); + if (Use->isMachineOpcode()) + // Adjust the use operand index by num of defs. + OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); + int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); + if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && + !BB->succ_empty()) { + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + // This copy is a liveout value. It is likely coalesced, so reduce the + // latency so not to penalize the def. + // FIXME: need target specific adjustment here? + Latency = (Latency > 1) ? Latency - 1 : 1; + } + if (Latency >= 0) + dep.setLatency(Latency); +} + +void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + if (!SU->getNode()) { + dbgs() << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); + dbgs() << "\n"; + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + dbgs() << " "; + GluedNodes.back()->dump(DAG); + dbgs() << "\n"; + GluedNodes.pop_back(); + } +#endif +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} +#endif + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + +namespace { + struct OrderSorter { + bool operator()(const std::pair<unsigned, MachineInstr*> &A, + const std::pair<unsigned, MachineInstr*> &B) { + return A.first < B.first; + } + }; +} + +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. +static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, + SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, + DenseMap<SDValue, unsigned> &VRBaseMap, + unsigned Order) { + if (!N->getHasDebugValue()) + return; + + // Opportunistically insert immediate dbg_value uses, i.e. those with source + // order number right after the N. + MachineBasicBlock *BB = Emitter.getBlock(); + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N); + for (unsigned i = 0, e = DVs.size(); i != e; ++i) { + if (DVs[i]->isInvalidated()) + continue; + unsigned DVOrder = DVs[i]->getOrder(); + if (!Order || DVOrder == ++Order) { + MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + if (DbgMI) { + Orders.push_back(std::make_pair(DVOrder, DbgMI)); + BB->insert(InsertPos, DbgMI); + } + DVs[i]->setIsInvalidated(); + } + } +} + +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule uses to determine how to insert dbg_value +// instructions in the right order. +static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = DAG->GetOrdering(N); + if (!Order || !Seen.insert(Order)) { + // Process any valid SDDbgValues even if node does not have any order + // assigned. + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); + return; + } + + MachineBasicBlock *BB = Emitter.getBlock(); + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + // Did not insert any instruction. + Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + return; + } + + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); +} + +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} + +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SUnit*, unsigned> CopyVRBaseMap; + SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; + SmallSet<unsigned, 8> Seen; + bool HasDbg = DAG->hasDebugValues(); + + // If this is the first BB, emit byval parameter dbg_value's. + if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { + SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); + SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); + for (; PDI != PDE; ++PDI) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); + if (DbgMI) + BB->insert(InsertPos, DbgMI); + } + } + + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + TII->insertNoop(*Emitter.getBlock(), InsertPos); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any glued SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); + continue; + } + + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + SDNode *N = GluedNodes.back(); + Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); + GluedNodes.pop_back(); + } + Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, + Seen); + } + + // Insert all the dbg_values which have not already been inserted in source + // order sequence. + if (HasDbg) { + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); + + // Sort the source order instructions and use the order to insert debug + // values. + std::sort(Orders.begin(), Orders.end(), OrderSorter()); + + SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); + SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); + // Now emit the rest according to source order. + unsigned LastOrder = 0; + for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { + unsigned Order = Orders[i].first; + MachineInstr *MI = Orders[i].second; + // Insert all SDDbgValue's whose order(s) are before "Order". + if (!MI) + continue; + for (; DI != DE && + (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { + if ((*DI)->isInvalidated()) + continue; + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. + MachineBasicBlock::iterator Pos = MI; + MI->getParent()->insert(llvm::next(Pos), DbgMI); + } + } + } + LastOrder = Order; + } + // Add trailing DbgValue's before the terminator. FIXME: May want to add + // some of them before one or more conditional branches? + SmallVector<MachineInstr*, 8> DbgMIs; + while (DI != DE) { + if (!(*DI)->isInvalidated()) + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); + ++DI; + } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); + } + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h new file mode 100644 index 0000000..2ff37e0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -0,0 +1,185 @@ +//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGSDNodes class, which implements +// scheduling for an SDNode-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGSDNODES_H +#define SCHEDULEDAGSDNODES_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/ScheduleDAG.h" + +namespace llvm { + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. + /// + /// Edges between SUnits are initially based on edges in the SelectionDAG, + /// and additional edges can be added by the schedulers as heuristics. + /// SDNodes such as Constants, Registers, and a few others that are not + /// interesting to schedulers are not allocated SUnits. + /// + /// SDNodes with MVT::Glue operands are grouped along with the flagged + /// nodes into a single SUnit so that they are scheduled together. + /// + /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output + /// edges. Physical register dependence information is not carried in + /// the DAG and must be handled explicitly by schedulers. + /// + class ScheduleDAGSDNodes : public ScheduleDAG { + public: + MachineBasicBlock *BB; + SelectionDAG *DAG; // DAG of the current basic block + const InstrItineraryData *InstrItins; + + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + + explicit ScheduleDAGSDNodes(MachineFunction &mf); + + virtual ~ScheduleDAGSDNodes() {} + + /// Run - perform scheduling. + /// + void Run(SelectionDAG *dag, MachineBasicBlock *bb); + + /// isPassiveNode - Return true if the node is a non-scheduled leaf. + /// + static bool isPassiveNode(SDNode *Node) { + if (isa<ConstantSDNode>(Node)) return true; + if (isa<ConstantFPSDNode>(Node)) return true; + if (isa<RegisterSDNode>(Node)) return true; + if (isa<RegisterMaskSDNode>(Node)) return true; + if (isa<GlobalAddressSDNode>(Node)) return true; + if (isa<BasicBlockSDNode>(Node)) return true; + if (isa<FrameIndexSDNode>(Node)) return true; + if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<TargetIndexSDNode>(Node)) return true; + if (isa<JumpTableSDNode>(Node)) return true; + if (isa<ExternalSymbolSDNode>(Node)) return true; + if (isa<BlockAddressSDNode>(Node)) return true; + if (Node->getOpcode() == ISD::EntryToken || + isa<MDNodeSDNode>(Node)) return true; + return false; + } + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *newSUnit(SDNode *N); + + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + /// + SUnit *Clone(SUnit *N); + + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we + /// are input. This SUnit graph is similar to the SelectionDAG, but + /// excludes nodes that aren't interesting to scheduling, and represents + /// flagged together nodes with a single SUnit. + void BuildSchedGraph(AliasAnalysis *AA); + + /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is + /// CopyToReg and its only active data operands are CopyFromReg within a + /// single block loop. + /// + void InitVRegCycleFlag(SUnit *SU); + + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. + /// + void InitNumRegDefsLeft(SUnit *SU); + + /// computeLatency - Compute node latency. + /// + virtual void computeLatency(SUnit *SU); + + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const; + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + virtual MachineBasicBlock* + EmitSchedule(MachineBasicBlock::iterator &InsertPos); + + virtual void dumpNode(const SUnit *SU) const; + + void dumpSchedule() const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + + virtual std::string getDAGName() const; + + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + + /// RegDefIter - In place iteration over the values defined by an + /// SUnit. This does not need copies of the iterator or any other STLisms. + /// The iterator creates itself, rather than being provided by the SchedDAG. + class RegDefIter { + const ScheduleDAGSDNodes *SchedDAG; + const SDNode *Node; + unsigned DefIdx; + unsigned NodeNumDefs; + MVT ValueType; + public: + RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); + + bool IsValid() const { return Node != NULL; } + + MVT GetValue() const { + assert(IsValid() && "bad iterator"); + return ValueType; + } + + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx-1; + } + + void Advance(); + private: + void InitNodeNumDefs(); + }; + + protected: + /// ForceUnitLatencies - Return true if all scheduling edges should be given + /// a latency value of one. The default is to return false; schedulers may + /// override this as needed. + virtual bool forceUnitLatencies() const { return false; } + + private: + /// ClusterNeighboringLoads - Cluster loads from "near" addresses into + /// combined SUnits. + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); + + /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. + void BuildSchedUnits(); + void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp new file mode 100644 index 0000000..58aa1fe --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -0,0 +1,278 @@ +//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + VLIWScheduler("vliw-td", "VLIW scheduler", + createVLIWDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This +/// supports / top-down scheduling. +/// +class ScheduleDAGVLIW : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + +public: + ScheduleDAGVLIW(MachineFunction &mf, + AliasAnalysis *aa, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + + const TargetMachine &tm = mf.getTarget(); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } + + ~ScheduleDAGVLIW() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + +private: + void releaseSucc(SUnit *SU, const SDep &D); + void releaseSuccessors(SUnit *SU); + void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void listScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGVLIW::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); + + // Build the scheduling graph. + BuildSchedGraph(AA); + + AvailableQueue->initNodes(SUnits); + + listScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + assert(!D.isWeak() && "unexpected artificial DAG edge"); + + --SuccSU->NumPredsLeft; + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + PendingQueue.push_back(SuccSU); + } +} + +void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + releaseSucc(SU, *I); + } +} + +/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + releaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->scheduledNode(SU); +} + +/// listScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGVLIW::listScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + releaseSuccessors(&EntrySU); + + // All leaves to AvailableQueue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While AvailableQueue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } + else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + // Reset DFA state. + AvailableQueue->scheduledNode(0); + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + scheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(dbgs() << "*** Emitting noop\n"); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createVLIWDAGScheduler - This creates a top-down list scheduler. +ScheduleDAGSDNodes * +llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 0000000..6424431 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,6382 @@ +//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "SDNodeDbgValue.h" +#include "SDNodeOrdering.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +// Default null implementations of the callbacks. +void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { + return getValueAPF().bitwiseIsEqual(V); +} + +bool ConstantFPSDNode::isValueValidForType(EVT VT, + const APFloat& Val) { + assert(VT.isFloatingPoint() && "Can only convert between FP types"); + + // convert modifies in place, so make a copy. + APFloat Val2 = APFloat(Val); + bool losesInfo; + (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +/// isBuildVectorAllOnes - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. + SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { + if (CN->getAPIntValue().countTrailingOnes() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + + +/// isBuildVectorAllZeros - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are 0 or undef. +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. + SDValue Zero = N->getOperand(i); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (!CN->isNullValue()) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + if (!CFPN->getValueAPF().isPosZero()) + return false; + } else + return false; + + // Okay, we have at least one 0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != Zero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +/// isScalarToVector - Return true if the specified node is a +/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low +/// element is not an undef. +bool ISD::isScalarToVector(const SDNode *N) { + if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) + return true; + + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + if (N->getOperand(0).getOpcode() == ISD::UNDEF) + return false; + unsigned NumElems = N->getNumOperands(); + if (NumElems == 1) + return false; + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = N->getOperand(i); + if (V.getOpcode() != ISD::UNDEF) + return false; + } + return true; +} + +/// allOperandsUndef - Return true if the node has at least one operand +/// and all operands of the specified node are ISD::UNDEF. +bool ISD::allOperandsUndef(const SDNode *N) { + // Return false if the node has no operands. + // This is "logically inconsistent" with the definition of "all" but + // is probably the desired behavior. + if (N->getNumOperands() == 0) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + + return true; +} + +/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) +/// when given the operation for (X op Y). +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +/// getSetCCInverse - Return the operation corresponding to !(X op Y), where +/// 'op' is a valid SetCC operation. +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + + return ISD::CondCode(Operation); +} + + +/// isSignedOp - For an integer comparison, return 1 if the comparison is a +/// signed operation and 2 if the result is an unsigned comparison. Return zero +/// if the operation does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: llvm_unreachable("Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +/// getSetCCOrOperation - Return the result of a logical OR between different +/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function +/// returns SETCC_INVALID if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set then the resultant comparison DOES suddenly + // care about orderedness, and is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +/// getSetCCAndOperation - Return the result of a logical AND between different +/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This +/// function returns zero if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (isInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETOEQ: // SETEQ & SETU[LG]E + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +/// +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDValue *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDUse *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, + unsigned short OpC, SDVTList VTList, + const SDValue *OpList, unsigned N) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList, N); +} + +/// AddNodeIDCustom - If this is an SDNode with special info, add this info to +/// the NodeID data. +static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { + switch (N->getOpcode()) { + case ISD::TargetExternalSymbol: + case ISD::ExternalSymbol: + llvm_unreachable("Should only be used on nodes with operands"); + default: break; // Normal nodes don't need extra info. + case ISD::TargetConstant: + case ISD::Constant: + ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + break; + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + ID.AddInteger(GA->getTargetFlags()); + ID.AddInteger(GA->getAddressSpace()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + break; + case ISD::RegisterMask: + ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); + break; + case ISD::SRCVALUE: + ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); + break; + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); + ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->addSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + ID.AddInteger(CP->getTargetFlags()); + break; + } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } + case ISD::LOAD: { + const LoadSDNode *LD = cast<LoadSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::STORE: { + const StoreSDNode *ST = cast<StoreSDNode>(N); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: { + const AtomicSDNode *AT = cast<AtomicSDNode>(N); + ID.AddInteger(AT->getMemoryVT().getRawBits()); + ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast<MemSDNode>(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } + case ISD::TargetBlockAddress: + case ISD::BlockAddress: { + const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); + ID.AddPointer(BA->getBlockAddress()); + ID.AddInteger(BA->getOffset()); + ID.AddInteger(BA->getTargetFlags()); + break; + } + } // end switch (N->getOpcode()) + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + + // Handle SDNode leafs with special info. + AddNodeIDCustom(ID, N); +} + +/// encodeMemSDNodeFlags - Generic routine for computing a value for use in +/// the CSE map that carries volatility, temporalness, indexing mode, and +/// extension/truncation information. +/// +static inline unsigned +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, + bool isNonTemporal, bool isInvariant) { + assert((ConvType & 3) == ConvType && + "ConvType may not require more than 2 bits!"); + assert((AM & 7) == AM && + "AM may not require more than 3 bits!"); + return ConvType | + (AM << 2) | + (isVolatile << 5) | + (isNonTemporal << 6) | + (isInvariant << 7); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// doNotCSE - Return true if CSE should not be performed for this node. +static bool doNotCSE(SDNode *N) { + if (N->getValueType(0) == MVT::Glue) + return true; // Never CSE anything that produces a flag. + + switch (N->getOpcode()) { + default: break; + case ISD::HANDLENODE: + case ISD::EH_LABEL: + return true; // Never CSE these nodes. + } + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Glue) + return true; // Never CSE anything that produces a flag. + + return false; +} + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector<SDNode*, 128> DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) + if (I->use_empty()) + DeadNodes.push_back(I); + + RemoveDeadNodes(DeadNodes); + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +/// RemoveDeadNodes - This method deletes the unreachable nodes in the +/// given list, and any nodes that become unreachable as a result. +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.pop_back_val(); + + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, 0); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Operand = Use.getNode(); + Use.set(SDValue()); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + + DeallocateNode(N); + } +} + +void SelectionDAG::RemoveDeadNode(SDNode *N){ + SmallVector<SDNode*, 16> DeadNodes(1, N); + + // Create a dummy node that adds a reference to the root node, preventing + // it from being deleted. (This matters if the root is an operand of the + // dead node.) + HandleSDNode Dummy(getRoot()); + + RemoveDeadNodes(DeadNodes); +} + +void SelectionDAG::DeleteNode(SDNode *N) { + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // Drop all of the operands and decrement used node's use counts. + N->DropOperands(); + + DeallocateNode(N); +} + +void SelectionDAG::DeallocateNode(SDNode *N) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + // Set the opcode to DELETED_NODE to help catch bugs when node + // memory is reallocated. + N->NodeType = ISD::DELETED_NODE; + + NodeAllocator.Deallocate(AllNodes.remove(N)); + + // Remove the ordering of this node. + Ordering->remove(N); + + // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. + ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); + for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) + DbgVals[i]->setIsInvalidated(); +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::HANDLENODE: return false; // noop. + case ISD::CONDCODE: + assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: { + ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); + Erased = TargetExternalSymbols.erase( + std::pair<std::string,unsigned char>(ESN->getSymbol(), + ESN->getTargetFlags())); + break; + } + case ISD::VALUETYPE: { + EVT VT = cast<VTSDNode>(N)->getVT(); + if (VT.isExtended()) { + Erased = ExtendedValueTypeNodes.erase(VT); + } else { + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0; + } + break; + } + default: + // Remove it from the CSE Map. + assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); + assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && + !N->isMachineOpcode() && !doNotCSE(N)) { + N->dump(this); + dbgs() << "\n"; + llvm_unreachable("Node is not in map!"); + } +#endif + return Erased; +} + +/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE +/// maps and modified in place. Add it back to the CSE maps, unless an identical +/// node already exists, in which case transfer all its users to the existing +/// node. This transfer can potentially trigger recursive merging. +/// +void +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { + // For node types that aren't CSE'd, just act as if no identical node + // already exists. + if (!doNotCSE(N)) { + SDNode *Existing = CSEMap.GetOrInsertNode(N); + if (Existing != N) { + // If there was already an existing matching node, use ReplaceAllUsesWith + // to replace the dead one with the existing one. This can cause + // recursive merging of other unrelated nodes down the line. + ReplaceAllUsesWith(N, Existing); + + // N is now dead. Inform the listeners and delete it. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, Existing); + DeleteNodeNotInCSEMaps(N); + return; + } + } + + // If the node doesn't already exist, we updated it. Inform listeners. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeUpdated(N); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDCustom(ID, N); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + return Node; +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDValue Op1, SDValue Op2, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDCustom(ID, N); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + return Node; +} + + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + const SDValue *Ops,unsigned NumOps, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDCustom(ID, N); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + return Node; +} + +#ifndef NDEBUG +/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid. +static void VerifyNodeCommon(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + EVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + assert(I->getValueType() == N->getOperand(0).getValueType() && + "Operands must all have the same type"); + } + break; + } + } +} + +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + // The SDNode allocators cannot be used to allocate nodes with fields that are + // not present in an SDNode! + assert(!isa<MemSDNode>(N) && "Bad MemSDNode!"); + assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!"); + assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!"); + assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!"); + assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!"); + assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!"); + assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!"); + assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!"); + assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!"); + assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!"); + assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!"); + assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!"); + assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!"); + assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!"); + assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!"); + assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!"); + assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!"); + assert(!isa<VTSDNode>(N) && "Bad VTSDNode!"); + assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!"); + + VerifyNodeCommon(N); +} + +/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is +/// invalid. +static void VerifyMachineNode(SDNode *N) { + // The MachineNode allocators cannot be used to allocate nodes with fields + // that are not present in a MachineNode! + // Currently there are no such nodes. + + VerifyNodeCommon(N); +} +#endif // NDEBUG + +/// getEVTAlignment - Compute the default alignment value for the +/// given type. +/// +unsigned SelectionDAG::getEVTAlignment(EVT VT) const { + Type *Ty = VT == MVT::iPTR ? + PointerType::get(Type::getInt8Ty(*getContext()), 0) : + VT.getTypeForEVT(*getContext()); + + return TLI.getDataLayout()->getABITypeAlignment(Ty); +} + +// EntryNode could meaningfully have debug info if we can find it... +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) + : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), + TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), + getVTList(MVT::Other)), + Root(getEntryNode()), Ordering(0), UpdateListeners(0) { + AllNodes.push_back(&EntryNode); + Ordering = new SDNodeOrdering(); + DbgInfo = new SDDbgInfo(); +} + +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { + MF = &mf; + TTI = tti; + Context = &mf.getFunction()->getContext(); +} + +SelectionDAG::~SelectionDAG() { + assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); + allnodes_clear(); + delete Ordering; + delete DbgInfo; +} + +void SelectionDAG::allnodes_clear() { + assert(&*AllNodes.begin() == &EntryNode); + AllNodes.remove(AllNodes.begin()); + while (!AllNodes.empty()) + DeallocateNode(AllNodes.begin()); +} + +void SelectionDAG::clear() { + allnodes_clear(); + OperandAllocator.Reset(); + CSEMap.clear(); + + ExtendedValueTypeNodes.clear(); + ExternalSymbols.clear(); + TargetExternalSymbols.clear(); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), + static_cast<CondCodeSDNode*>(0)); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), + static_cast<SDNode*>(0)); + + EntryNode.UseList = 0; + AllNodes.push_back(&EntryNode); + Root = getEntryNode(); + Ordering->clear(); + DbgInfo->clear(); +} + +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ANY_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::SIGN_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ZERO_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { + assert(!VT.isVector() && + "getZeroExtendInReg should use the vector element type instead of " + "the vector type!"); + if (Op.getValueType() == VT) return Op; + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Imm = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + return getNode(ISD::AND, DL, Op.getValueType(), Op, + getConstant(Imm, Op.getValueType())); +} + +/// getNOT - Create a bitwise NOT operation as (XOR Val, -1). +/// +SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue NegOne = + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + return getNode(ISD::XOR, DL, VT, Val, NegOne); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { + EVT EltVT = VT.getScalarType(); + assert((EltVT.getSizeInBits() >= 64 || + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + "getConstant with a uint64_t value that doesn't fit in the type!"); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { + return getConstant(*ConstantInt::get(*Context, Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { + assert(VT.isInteger() && "Cannot create FP integer constant!"); + + EVT EltVT = VT.getScalarType(); + const ConstantInt *Elt = &Val; + + // In some cases the vector type is legal but the element type is illegal and + // needs to be promoted, for example v8i8 on ARM. In this case, promote the + // inserted value (the type does not need to match the vector element type). + // Any extra bits introduced will be truncated away. + if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) == + TargetLowering::TypePromoteInteger) { + EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT); + APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); + Elt = ConstantInt::get(*getContext(), NewVal); + } + + assert(Elt->getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(Elt); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { + return getConstant(Val, TLI.getPointerTy(), isTarget); +} + + +SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget); +} + +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ + assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); + + EVT EltVT = VT.getScalarType(); + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&V); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + // FIXME DebugLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { + EVT EltVT = VT.getScalarType(); + if (EltVT==MVT::f32) + return getConstantFP(APFloat((float)Val), VT, isTarget); + else if (EltVT==MVT::f64) + return getConstantFP(APFloat(Val), VT, isTarget); + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || + EltVT==MVT::f16) { + bool ignored; + APFloat apf = APFloat(Val); + apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &ignored); + return getConstantFP(apf, VT, isTarget); + } else + llvm_unreachable("Unsupported type in getConstantFP"); +} + +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, + EVT VT, int64_t Offset, + bool isTargetGA, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTargetGA) && + "Cannot set target flags on target-independent globals"); + + // Truncate (with sign-extension) the offset value to the pointer size. + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + if (BitWidth < 64) + Offset = SignExtend64(Offset, BitWidth); + + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar) { + // If GV is an alias then use the aliasee for determining thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + } + + unsigned Opc; + if (GVar && GVar->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(GV); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + ID.AddInteger(GV->getType()->getAddressSpace()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, + Offset, TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(FI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent jump tables"); + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(JTI); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->addSelectionDAGCSEId(ID); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned char TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MBB); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getValueType(EVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= + ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); + + SDNode *&N = VT.isExtended() ? + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; + + if (N) return SDValue(N, 0); + N = new (NodeAllocator) VTSDNode(VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, + unsigned char TargetFlags) { + SDNode *&N = + TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, + TargetFlags)]; + if (N) return SDValue(N, 0); + N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (CondCodeNodes[Cond] == 0) { + CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); + CondCodeNodes[Cond] = N; + AllNodes.push_back(N); + } + + return SDValue(CondCodeNodes[Cond], 0); +} + +// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in +// the shuffle mask M that point at N1 to point at N2, and indices that point +// N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { + std::swap(N1, N2); + int NElts = M.size(); + for (int i = 0; i != NElts; ++i) { + if (M[i] >= NElts) + M[i] -= NElts; + else if (M[i] >= 0) + M[i] += NElts; + } +} + +SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, + SDValue N2, const int *Mask) { + assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); + assert(VT.isVector() && N1.getValueType().isVector() && + "Vector Shuffle VTs must be a vectors"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() + && "Vector Shuffle VTs must have same element type"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + unsigned NElts = VT.getVectorNumElements(); + SmallVector<int, 8> MaskVec; + for (unsigned i = 0; i != NElts; ++i) { + assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); + MaskVec.push_back(Mask[i]); + } + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (unsigned i = 0; i != NElts; ++i) + if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.getOpcode() == ISD::UNDEF) + commuteShuffle(N1, N2, MaskVec); + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.getOpcode() == ISD::UNDEF; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= (int)NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS && !N2Undef) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + + // If Identity shuffle, or all shuffle in to undef, return that node. + bool AllUndef = true; + bool Identity = true; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] >= 0) AllUndef = false; + } + if (Identity && NElts == N1.getValueType().getVectorNumElements()) + return N1; + if (AllUndef) + return getUNDEF(VT); + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + for (unsigned i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); + memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + + ShuffleVectorSDNode *N = + new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, + SDValue Val, SDValue DTy, + SDValue STy, SDValue Rnd, SDValue Sat, + ISD::CvtCode Code) { + // If the src and dest types are the same and the conversion is between + // integer types of the same sign or two floats, no conversion is necessary. + if (DTy == STy && + (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) + return Val; + + FoldingSetNodeID ID; + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5, + Code); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + ID.AddInteger(RegNo); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + ID.AddPointer(RegMask); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { + FoldingSetNodeID ID; + SDValue Ops[] = { Root }; + AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); + ID.AddPointer(Label); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + int64_t Offset, + bool isTarget, + unsigned char TargetFlags) { + unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(BA); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getSrcValue(const Value *V) { + assert((!V || V->getType()->isPointerTy()) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(V); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) SrcValueSDNode(V); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getMDNode - Return an MDNodeSDNode which holds an MDNode. +SDValue SelectionDAG::getMDNode(const MDNode *MD) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MD); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +/// getShiftAmountOperand - Return the specified value casted to +/// the target's desired shift amount type. +SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { + EVT OpTy = Op.getValueType(); + EVT ShTy = TLI.getShiftAmountTy(LHSTy); + if (OpTy == ShTy || OpTy.isVector()) return Op; + + ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); +} + +/// CreateStackTemporary - Create a stack temporary, suitable for holding the +/// specified value type. +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + unsigned ByteSize = VT.getStoreSize(); + Type *Ty = VT.getTypeForEVT(*getContext()); + unsigned StackAlign = + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); + + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +/// CreateStackTemporary - Create a stack temporary suitable for holding +/// either of the specified value types. +SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { + unsigned Bytes = std::max(VT1.getStoreSizeInBits(), + VT2.getStoreSizeInBits())/8; + Type *Ty1 = VT1.getTypeForEVT(*getContext()); + Type *Ty2 = VT2.getTypeForEVT(*getContext()); + const DataLayout *TD = TLI.getDataLayout(); + unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), + TD->getPrefTypeAlignment(Ty2)); + + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, + SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return getConstant(1, VT); + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + break; + } + + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) { + const APInt &C2 = N2C->getAPIntValue(); + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: return getConstant(C1 == C2, VT); + case ISD::SETNE: return getConstant(C1 != C2, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); + case ISD::SETULE: return getConstant(C1.ule(C2), VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), VT); + case ISD::SETLT: return getConstant(C1.slt(C2), VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), VT); + case ISD::SETLE: return getConstant(C1.sle(C2), VT); + case ISD::SETGE: return getConstant(C1.sge(C2), VT); + } + } + } + if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { + APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, VT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, VT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT); + case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); + case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, VT); + case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); + } + } else { + // Ensure that the constant occurs on the RHS. + return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + } + } + + // Could not fold it. + return SDValue(); +} + +/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We +/// use this predicate to simplify operations downstream. +bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + // This predicate is not safe for vector operations. + if (Op.getValueType().isVector()) + return false; + + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, + unsigned Depth) const { + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + if (Depth == 6) + return; // Limit search depth. + + APInt KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; + return; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case ISD::OR: + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case ISD::XOR: { + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case ISD::MUL: { + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + return; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); + return; + } + case ISD::SELECT: + ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SELECT_CC: + ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + return; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI.getBooleanContents(Op.getValueType().isVector()) == + TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShAmt; + KnownOne <<= ShAmt; + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); + } + return; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + return; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bits. + APInt SignBit = APInt::getSignBit(BitWidth); + SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask. + + if (KnownZero.intersects(SignBit)) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne.intersects(SignBit)) { + KnownOne |= HighBits; // New bits are known one. + } + } + return; + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned EBits = EVT.getScalarType().getSizeInBits(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); + + APInt InSignBit = APInt::getSignBit(EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + InSignBit = InSignBit.zext(BitWidth); + if (NewBits.getBoolValue()) + InputDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownOne &= InputDemandedBits; + KnownZero &= InputDemandedBits; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + return; + } + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownOne.clearAllBits(); + return; + } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + // If this is a ZEXTLoad and we are looking at the loaded value. + if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + EVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + computeMaskedBitsLoad(*Ranges, KnownZero); + } + return; + } + case ISD::ZERO_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + KnownZero |= NewBits; + return; + } + case ISD::SIGN_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt InSignBit = APInt::getSignBit(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); + + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + + // Note if the sign bit is known to be zero or one. + bool SignBitKnownZero = KnownZero.isNegative(); + bool SignBitKnownOne = KnownOne.isNegative(); + assert(!(SignBitKnownZero && SignBitKnownOne) && + "Sign bit can't be known to be both zero and one!"); + + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + + // If the sign bit is known zero or one, the top bits match. + if (SignBitKnownZero) + KnownZero |= NewBits; + else if (SignBitKnownOne) + KnownOne |= NewBits; + return; + } + case ISD::ANY_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + return; + } + case ISD::TRUNCATE: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + KnownZero = KnownZero.zext(InBits); + KnownOne = KnownOne.zext(InBits); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + break; + } + case ISD::AssertZext: { + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero |= (~InMask); + KnownOne &= (~KnownZero); + return; + } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); + } + } + } + } + // fall through + case ISD::ADD: + case ISD::ADDE: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + if (Op.getOpcode() == ISD::ADD) { + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + return; + } + + // With ADDE, a carry bit may be added in, so we can only use this + // information if we know (at least) that the low two bits are clear. We + // then return to the caller that the low bit is unknown but that other bits + // are known zero. + if (KnownZeroOut >= 2) // ADDE + KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); + return; + } + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); + + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero |= ~LowBits; + + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + return; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + KnownZero |= ~LowBits; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clearAllBits(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); + return; + } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + if (unsigned Align = InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); + return; + } + break; + + default: + if (Op.getOpcode() < ISD::BUILTIN_OP_END) + break; + // Fallthrough + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + // Allow the target to implement this method for its nodes. + TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + return; + } +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + EVT VT = Op.getValueType(); + assert(VT.isInteger() && "Invalid VT!"); + unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp; + + case ISD::Constant: { + const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); + return Val.getNumSignBits(); + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = + cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits(); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getZExtValue() >= VTBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI.getBooleanContents(Op.getValueType().isVector()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RotAmt = C->getZExtValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // If we are looking at the loaded value of the SDNode. + if (Op.getResNo() == 0) { + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp; + } + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + + APInt Mask; + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); +} + +/// isBaseWithConstantOffset - Return true if the specified operand is an +/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an +/// ISD::OR with a ConstantSDNode that is guaranteed to have the same +/// semantics as an ADD. This handles the equivalence: +/// X|Cst == X+Cst iff X&Cst = 0. +bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { + if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || + !isa<ConstantSDNode>(Op.getOperand(1))) + return false; + + if (Op.getOpcode() == ISD::OR && + !MaskedValueIsZero(Op.getOperand(0), + cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) + return false; + + return true; +} + + +bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { + // If we're told that NaNs won't happen, assume they won't. + if (getTarget().Options.NoNaNsFPMath) + return true; + + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->getValueAPF().isNaN(); + + // TODO: Recognize more cases here. + + return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + // If the value is a constant, we can obviously see if it is a zero or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->isZero(); + + // TODO: Recognize more cases here. + switch (Op.getOpcode()) { + default: break; + case ISD::OR: + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return !C->isNullValue(); + break; + } + + return false; +} + +bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { + // Check the obvious case. + if (A == B) return true; + + // For for negative and positive zero. + if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) + if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) + if (CA->isZero() && CB->isZero()) return true; + + // Otherwise they may not be equal. + return false; +} + +/// getNode - Gets or creates the specified node. +/// +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT)); + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + EVT VT, SDValue Operand) { + // Constant fold unary operations with an integer constant operand. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { + const APInt &Val = C->getAPIntValue(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); + (void)apf.convertFromAPInt(Val, + Opcode==ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, VT); + } + case ISD::BITCAST: + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); + else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); + break; + case ISD::BSWAP: + return getConstant(Val.byteSwap(), VT); + case ISD::CTPOP: + return getConstant(Val.countPopulation(), VT); + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + return getConstant(Val.countLeadingZeros(), VT); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + return getConstant(Val.countTrailingZeros(), VT); + } + } + + // Constant fold unary operations with a floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { + APFloat V = C->getValueAPF(); // make copy + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + APInt api(VT.getSizeInBits(), x); + return getConstant(api, VT); + } + case ISD::BITCAST: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + } + + unsigned OpOpcode = Operand.getNode()->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + case ISD::MERGE_VALUES: + case ISD::CONCAT_VECTORS: + return Operand; // Factor, merge or concat of one node? No need. + case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); + case ISD::FP_EXTEND: + assert(VT.isFloatingPoint() && + Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); + if (Operand.getValueType() == VT) return Operand; // noop conversion. + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (Operand.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SIGN_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid SIGN_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid sext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // sext(undef) = 0, because the top bits will all be the same. + return getConstant(0, VT); + break; + case ISD::ZERO_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ZERO_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid zext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, DL, VT, + Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // zext(undef) = 0, because the top bits will be zero. + return getConstant(0, VT); + break; + case ISD::ANY_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ANY_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid anyext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } + break; + case ISD::TRUNCATE: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid TRUNCATE!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && + "Invalid truncate node, src < dst!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + .bitsLT(VT.getScalarType())) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + return Operand.getNode()->getOperand(0); + } + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::BITCAST: + // Basic sanity checking. + assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() + && "Cannot BITCAST between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(VT.isVector() && !Operand.getValueType().isVector() && + (VT.getVectorElementType() == Operand.getValueType() || + (VT.getVectorElementType().isInteger() && + Operand.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(Operand.getValueType()))) && + "Illegal SCALAR_TO_VECTOR node!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. + if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Operand.getOperand(1)) && + Operand.getConstantOperandVal(1) == 0 && + Operand.getOperand(0).getValueType() == VT) + return Operand.getOperand(0); + break; + case ISD::FNEG: + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), + Operand.getNode()->getOperand(0)); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.getNode()->getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Glue) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + SDValue Ops[1] = { Operand }; + AddNodeIDNode(ID, Opcode, VTs, Ops, 1); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2) { + SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; + SmallVector<SDValue, 4> Outputs; + EVT SVT = VT.getScalarType(); + + ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); + ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); + if (Scalar1 && Scalar2) { + // Scalar instruction. + Inputs.push_back(std::make_pair(Scalar1, Scalar2)); + } else { + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); + BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); + if (!BV1 || !BV2) + return SDValue(); + + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); + + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + Inputs.push_back(std::make_pair(V1, V2)); + } + } + + // We have a number of constant values, constant fold them element by element. + for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { + const APInt &C1 = Inputs[I].first->getAPIntValue(); + const APInt &C2 = Inputs[I].second->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: + Outputs.push_back(getConstant(C1 + C2, SVT)); + break; + case ISD::SUB: + Outputs.push_back(getConstant(C1 - C2, SVT)); + break; + case ISD::MUL: + Outputs.push_back(getConstant(C1 * C2, SVT)); + break; + case ISD::UDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.udiv(C2), SVT)); + break; + case ISD::UREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.urem(C2), SVT)); + break; + case ISD::SDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); + break; + case ISD::SREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.srem(C2), SVT)); + break; + case ISD::AND: + Outputs.push_back(getConstant(C1 & C2, SVT)); + break; + case ISD::OR: + Outputs.push_back(getConstant(C1 | C2, SVT)); + break; + case ISD::XOR: + Outputs.push_back(getConstant(C1 ^ C2, SVT)); + break; + case ISD::SHL: + Outputs.push_back(getConstant(C1 << C2, SVT)); + break; + case ISD::SRL: + Outputs.push_back(getConstant(C1.lshr(C2), SVT)); + break; + case ISD::SRA: + Outputs.push_back(getConstant(C1.ashr(C2), SVT)); + break; + case ISD::ROTL: + Outputs.push_back(getConstant(C1.rotl(C2), SVT)); + break; + case ISD::ROTR: + Outputs.push_back(getConstant(C1.rotr(C2), SVT)); + break; + default: + return SDValue(); + } + } + + // Handle the scalar case first. + if (Outputs.size() == 1) + return Outputs.back(); + + // Otherwise build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + Outputs.size()); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, + SDValue N2) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + switch (Opcode) { + default: break; + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + if (N1 == N2) return N1; + break; + case ISD::CONCAT_VECTORS: + // Concat of UNDEFs is UNDEF. + if (N1.getOpcode() == ISD::UNDEF && + N2.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::AND: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->isNullValue()) + return N2; + if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + return N1; + break; + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so + // it's worth handling here. + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (getTarget().Options.UnsafeFPMath) { + if (Opcode == ISD::FADD) { + // 0+x --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) + if (CFP->getValueAPF().isZero()) + return N2; + // x+0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FSUB) { + // x-0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FMUL) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1); + SDValue V = N2; + + // If the first operand isn't the constant, try the second + if (!CFP) { + CFP = dyn_cast<ConstantFPSDNode>(N2); + V = N1; + } + + if (CFP) { + // 0*x --> 0 + if (CFP->isZero()) + return SDValue(CFP,0); + // 1*x --> x + if (CFP->isExactlyValue(1.0)) + return V; + } + } + } + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + N1.getValueType().isFloatingPoint() && + N2.getValueType().isFloatingPoint() && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(VT.isInteger() && N2.getValueType().isInteger() && + "Shifts only work on integers"); + assert((!VT.isVector() || VT == N2.getValueType()) && + "Vector shift amounts must be in the same as their first arg"); + // Verify that the shift amount VT is bit enough to hold valid shift + // amounts. This catches things like trying to shift an i1024 value by an + // i8, which is easy to fall into in generic code that uses + // TLI.getShiftAmount(). + assert(N2.getValueType().getSizeInBits() >= + Log2_32_Ceil(N1.getValueType().getSizeInBits()) && + "Invalid use of small shift amount with oversized value!"); + + // Always fold shifts of i1 values so the code generator doesn't need to + // handle them. Since we know the size of the shift has to be less than the + // size of the value, the shift/rotate count is guaranteed to be zero. + if (VT == MVT::i1) + return N1; + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::FP_ROUND_INREG: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg round!"); + assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && + "Cannot FP_ROUND_INREG integer types"); + assert(EVT.isVector() == VT.isVector() && + "FP_ROUND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in FP_ROUND_INREG"); + assert(EVT.bitsLE(VT) && "Not rounding down!"); + (void)EVT; + if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. + break; + } + case ISD::FP_ROUND: + assert(VT.isFloatingPoint() && + N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && + isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!"); + if (N1.getValueType() == VT) return N1; // noop conversion. + break; + case ISD::AssertSext: + case ISD::AssertZext: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(!EVT.isVector() && + "AssertSExt/AssertZExt type should be the vector element type " + "rather than the vector type!"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (VT == EVT) return N1; // noop assertion. + break; + } + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.isVector() == VT.isVector() && + "SIGN_EXTEND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in SIGN_EXTEND_INREG"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (EVT == VT) return N1; // Not actually extending + + if (N1C) { + APInt Val = N1C->getAPIntValue(); + unsigned FromBits = EVT.getScalarType().getSizeInBits(); + Val <<= Val.getBitWidth()-FromBits; + Val = Val.ashr(Val.getBitWidth()-FromBits); + return getConstant(Val, VT); + } + break; + } + case ISD::EXTRACT_VECTOR_ELT: + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. + if (N1.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N2C && + N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + N1.getOperand(0).getValueType().getVectorNumElements(); + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + N1.getOperand(N2C->getZExtValue() / Factor), + getConstant(N2C->getZExtValue() % Factor, + N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt = N1.getOperand(N2C->getZExtValue()); + + if (VT != Elt.getValueType()) + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated, and the result implicitly + // extended. Make that explicit here. + Elt = getAnyExtOrTrunc(Elt, DL, VT); + + return Elt; + } + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element else + // if the indices are known different, extract the element from + // the original vector. + SDValue N1Op2 = N1.getOperand(2); + ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode()); + + if (N1Op2C && N2C) { + if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { + if (VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + else + return getSExtOrTrunc(N1.getOperand(1), DL, VT); + } + + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); + assert(!N1.getValueType().isVector() && !VT.isVector() && + (N1.getValueType().isInteger() == VT.isInteger()) && + N1.getValueType() != VT && + "Wrong types for EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getZExtValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + unsigned ElementSize = VT.getSizeInBits(); + unsigned Shift = ElementSize * N2C->getZExtValue(); + APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + return getConstant(ShiftedVal.trunc(ElementSize), VT); + } + break; + case ISD::EXTRACT_SUBVECTOR: { + SDValue Index = N2; + if (VT.isSimple() && N1.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + "Extract subvector VTs must be a vectors!"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + "Extract subvector must be from larger vector to smaller vector!"); + + if (isa<ConstantSDNode>(Index.getNode())) { + assert((VT.getVectorNumElements() + + cast<ConstantSDNode>(Index.getNode())->getZExtValue() + <= N1.getValueType().getVectorNumElements()) + && "Extract subvector overflow!"); + } + + // Trivial extraction. + if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + return N1; + } + break; + } + } + + // Perform trivial constant folding. + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); + if (SV.getNode()) return SV; + + // Canonicalize constant to RHS if commutative. + if (N1C && !N2C && isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } + + // Constant fold FP operations. + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + if (N1CFP) { + if (!N2CFP && isCommutativeBinOp(Opcode)) { + // Canonicalize constant to RHS if commutative. + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } else if (N2CFP) { + APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); + APFloat::opStatus s; + switch (Opcode) { + case ISD::FADD: + s = V1.add(V2, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FSUB: + s = V1.subtract(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FMUL: + s = V1.multiply(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FDIV: + s = V1.divide(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FREM : + s = V1.mod(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FCOPYSIGN: + V1.copySign(V2); + return getConstantFP(V1, VT); + default: break; + } + } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + } + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.getOpcode() == ISD::UNDEF) { + if (isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SRA: + return N1; // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(undef, arg2) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N2; + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::XOR: + if (N1.getOpcode() == ISD::UNDEF) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return getConstant(0, VT); + // fallthrough + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUB: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + return N2; // fold op(arg1, undef) -> undef + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (getTarget().Options.UnsafeFPMath) + return N2; + break; + case ISD::MUL: + case ISD::AND: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(arg1, undef) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N1; + case ISD::OR: + if (!VT.isVector()) + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + // For vectors, we can't easily build an all one vector, just return + // the LHS. + return N1; + case ISD::SRA: + return N1; + } + } + + // Memoize this node if possible. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Glue) { + SDValue Ops[] = { N1, N2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3) { + // Perform various simplifications. + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + switch (Opcode) { + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR && + N3.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::SETCC: { + // Use FoldSetCC to simplify SETCC's. + SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL); + if (Simp.getNode()) return Simp; + break; + } + case ISD::SELECT: + if (N1C) { + if (N1C->getZExtValue()) + return N2; // select true, X, Y -> X + return N3; // select false, X, Y -> Y + } + + if (N2 == N3) return N2; // select C, X, X -> X + break; + case ISD::VECTOR_SHUFFLE: + llvm_unreachable("should use getVectorShuffle constructor!"); + case ISD::INSERT_SUBVECTOR: { + SDValue Index = N3; + if (VT.isSimple() && N1.getValueType().isSimple() + && N2.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + N2.getValueType().isVector() && + "Insert subvector VTs must be a vectors"); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + "Insert subvector must be from smaller vector to larger vector!"); + if (isa<ConstantSDNode>(Index.getNode())) { + assert((N2.getValueType().getVectorNumElements() + + cast<ConstantSDNode>(Index.getNode())->getZExtValue() + <= VT.getVectorNumElements()) + && "Insert subvector overflow!"); + } + + // Trivial insertion. + if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + return N2; + } + break; + } + case ISD::BITCAST: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Glue) { + SDValue Ops[] = { N1, N2, N3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VT, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VT, Ops, 5); +} + +/// getStackArgumentTokenFactor - Compute a TokenFactor to force all +/// the incoming stack arguments to be loaded from the stack. +SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { + SmallVector<SDValue, 8> ArgChains; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument. + for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), + UE = getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) + ArgChains.push_back(SDValue(L, 1)); + + // Build a tokenfactor for all the chains. + return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, + DebugLoc dl) { + assert(Value.getOpcode() != ISD::UNDEF); + + unsigned NumBits = VT.getScalarType().getSizeInBits(); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { + assert(C->getAPIntValue().getBitWidth() == 8); + APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); + if (VT.isInteger()) + return DAG.getConstant(Val, VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); + } + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + if (NumBits > 8) { + // Use a multiplication with 0x010101... to extend the input to the + // required length. + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); + } + + return Value; +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, StringRef Str) { + // Handle vector with all elements zero. + if (Str.empty()) { + if (VT.isInteger()) + return DAG.getConstant(0, VT); + else if (VT == MVT::f32 || VT == MVT::f64) + return DAG.getConstantFP(0.0, VT); + else if (VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElts))); + } else + llvm_unreachable("Expected type!"); + } + + assert(!VT.isVector() && "Can't handle vector type here!"); + unsigned NumVTBits = VT.getSizeInBits(); + unsigned NumVTBytes = NumVTBits / 8; + unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + + APInt Val(NumVTBits, 0); + if (TLI.isLittleEndian()) { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << i*8; + } else { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; + } + + // If the "cost" of materializing the integer immediate is 1 or free, then + // it is cost effective to turn the load into the immediate. + const TargetTransformInfo *TTI = DAG.getTargetTransformInfo(); + if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) + return DAG.getConstant(Val, VT); + return SDValue(0, 0); +} + +/// getMemBasePlusOffset - Returns base and offset node for the +/// +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, + SelectionDAG &DAG) { + EVT VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + VT, Base, DAG.getConstant(Offset, VT)); +} + +/// isMemSrcFromString - Returns true if memcpy source is a string constant. +/// +static bool isMemSrcFromString(SDValue Src, StringRef &Str) { + unsigned SrcDelta = 0; + GlobalAddressSDNode *G = NULL; + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast<GlobalAddressSDNode>(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast<GlobalAddressSDNode>(Src.getOperand(0)); + SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + } + if (!G) + return false; + + return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); +} + +/// FindOptimalMemOpLowering - Determines the optimial series memory ops +/// to replace the memset / memcpy. Return true if the number of memory ops +/// is below the threshold. It returns the types of the sequence of +/// memory ops to perform memset / memcpy by reference. +static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + bool AllowOverlap, + SelectionDAG &DAG, + const TargetLowering &TLI) { + assert((SrcAlign == 0 || SrcAlign >= DstAlign) && + "Expecting memcpy / memset source to meet alignment requirement!"); + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, + IsMemset, ZeroMemset, MemcpyStrSrc, + DAG.getMachineFunction()); + + if (VT == MVT::Other) { + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || + TLI.allowsUnalignedMemoryAccesses(VT)) { + VT = TLI.getPointerTy(); + } else { + switch (DstAlign & 7) { + case 0: VT = MVT::i64; break; + case 4: VT = MVT::i32; break; + case 2: VT = MVT::i16; break; + default: VT = MVT::i8; break; + } + } + + MVT LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); + assert(LVT.isInteger()); + + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT = VT; + unsigned NewVTSize; + + bool Found = false; + if (VT.isVector() || VT.isFloatingPoint()) { + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && + TLI.isSafeMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + TLI.isSafeMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; + } + } + + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + // FIXME: Only does this for 64-bit or more since we don't have proper + // cost model for unaligned load / store. + bool Fast; + if (NumMemOps && AllowOverlap && + VTSize >= 8 && NewVTSize < Size && + TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Turn a memcpy of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memcpy to a series of load and store ops if the size operand falls + // below a certain threshold. + // TODO: In the AlwaysInline case, if the size is big then generate a loop + // rather than maybe a humongous number of loads and stores. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = + MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + StringRef Str; + bool CopyFromStr = isMemSrcFromString(Src, Str); + bool isZeroStr = CopyFromStr && Str.empty(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); + + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), + (isZeroStr ? 0 : SrcAlign), + false, false, CopyFromStr, true, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Align && + TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) + NewAlign /= 2; + + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + uint64_t SrcOff = 0, DstOff = 0; + for (unsigned i = 0; i != NumMemOps; ++i) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + SrcOff -= VTSize - Size; + DstOff -= VTSize - Size; + } + + if (CopyFromStr && + (isZeroStr || (VT.isInteger() && !VT.isVector()))) { + // It's unlikely a store of a vector immediate can be done in a single + // instruction. It would require a load from a constantpool first. + // We only handle zero vectors here. + // FIXME: Handle other cases where store of vector immediate is done in + // a single instruction. + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); + if (Value.getNode()) + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstPtrInfo.getWithOffset(DstOff), isVol, + false, Align); + } + + if (!Store.getNode()) { + // The type might not be legal for the target. This should only happen + // if the type is smaller than a legal type, as on PPC, so the right + // thing to do is generate a LoadExt/StoreTrunc pair. These simplify + // to Load/Store if NVT==VT. + // FIXME does the case above also need this? + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + assert(NVT.bitsGE(VT)); + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, + MinAlign(SrcAlign, SrcOff)); + Store = DAG.getTruncStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstPtrInfo.getWithOffset(DstOff), VT, isVol, + false, Align); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + Size -= VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Turn a memmove of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memmove to a series of load and store ops if the size operand falls + // below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); + + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), SrcAlign, + false, false, false, false, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + uint64_t SrcOff = 0, DstOff = 0; + SmallVector<SDValue, 8> LoadValues; + SmallVector<SDValue, 8> LoadChains; + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcPtrInfo.getWithOffset(SrcOff), isVol, + false, false, SrcAlign); + LoadValues.push_back(Value); + LoadChains.push_back(Value.getValue(1)); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + OutChains.clear(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Store = DAG.getStore(Chain, dl, LoadValues[i], + getMemBasePlusOffset(Dst, DstOff, DAG), + DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + MachinePointerInfo DstPtrInfo) { + // Turn a memset of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + bool IsZeroVal = + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), + Size, (DstAlignCanChange ? 0 : Align), 0, + true, IsZeroVal, false, true, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + SmallVector<SDValue, 8> OutChains; + uint64_t DstOff = 0; + unsigned NumMemOps = MemOps.size(); + + // Find the largest store and generate the bit pattern for it. + EVT LargestVT = MemOps[0]; + for (unsigned i = 1; i < NumMemOps; i++) + if (MemOps[i].bitsGT(LargestVT)) + LargestVT = MemOps[i]; + SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + DstOff -= VTSize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + SDValue Value = MemSetValue; + if (VT.bitsLT(LargestVT)) { + if (!LargestVT.isVector() && !VT.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); + else + Value = getMemsetValue(Src, VT, DAG, dl); + } + assert(Value.getValueType() == VT && "Value with wrong type."); + SDValue Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstPtrInfo.getWithOffset(DstOff), + isVol, false, Align); + OutChains.push_back(Store); + DstOff += VT.getSizeInBits() / 8; + Size -= VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(),Align, + isVol, false, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + isVol, AlwaysInline, + DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + true, DstPtrInfo, SrcPtrInfo); + } + + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc + // memcpy is not guaranteed to be safe. libc memcpys aren't required to + // respect volatile, so they may do things like read or write memory + // beyond the given memory regions. But fixing this isn't easy, and most + // people don't care. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), + TLI.getPointerTy()), + Args, *this, dl); + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + + return CallResult.second; +} + +SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memmove to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memmove with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + false, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, + DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may + // not be safe. See memcpy above for more details. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), + TLI.getPointerTy()), + Args, *this, dl); + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + + return CallResult.second; +} + +SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, + MachinePointerInfo DstPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, isVol, DstPtrInfo); + + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, + DstPtrInfo); + if (Result.getNode()) + return Result; + + // Emit a library call. + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else + Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*getContext()); + Entry.isSExt = true; + Args.push_back(Entry); + Entry.Node = Size; + Entry.Ty = IntPtrTy; + Entry.isSExt = false; + Args.push_back(Entry); + // FIXME: pass in DebugLoc + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMSET), + /*isTailCall=*/false, + /*doesNotReturn*/false, /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), + TLI.getPointerTy()), + Args, *this, dl); + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + + return CallResult.second; +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, SDValue Ptr, SDValue Cmp, + SDValue Swp, MachinePointerInfo PtrInfo, + unsigned Alignment, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + + // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. + // For now, atomics are considered to be volatile always. + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + EVT VT = Cmp.getValueType(); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, Cmp, Swp, MMO, Ordering, + SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + const Value* PtrVal, + unsigned Alignment, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + assert((Opcode == ISD::ATOMIC_LOAD_ADD || + Opcode == ISD::ATOMIC_LOAD_SUB || + Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_OR || + Opcode == ISD::ATOMIC_LOAD_XOR || + Opcode == ISD::ATOMIC_LOAD_NAND || + Opcode == ISD::ATOMIC_LOAD_MIN || + Opcode == ISD::ATOMIC_LOAD_MAX || + Opcode == ISD::ATOMIC_LOAD_UMIN || + Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_SWAP || + Opcode == ISD::ATOMIC_STORE) && + "Invalid Atomic Op"); + + EVT VT = Val.getValueType(); + + SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : + getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Val}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, Val, MMO, + Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + EVT VT, SDValue Chain, + SDValue Ptr, + const Value* PtrVal, + unsigned Alignment, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + EVT VT, SDValue Chain, + SDValue Ptr, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, MMO, Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getMergeValues - Create a MERGE_VALUES node from the given operands. +SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, + DebugLoc dl) { + if (NumOps == 1) + return Ops[0]; + + SmallVector<EVT, 4> VTs; + VTs.reserve(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + VTs.push_back(Ops[i].getValueType()); + return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), + Ops, NumOps); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, + const EVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, MachinePointerInfo PtrInfo, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, + MemVT, PtrInfo, Align, Vol, + ReadMem, WriteMem); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, MachinePointerInfo PtrInfo, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + if (Align == 0) // Ensure that codegen never sees alignment 0 + Align = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = 0; + if (WriteMem) + Flags |= MachineMemOperand::MOStore; + if (ReadMem) + Flags |= MachineMemOperand::MOLoad; + if (Vol) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); + + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, MachineMemOperand *MMO) { + assert((Opcode == ISD::INTRINSIC_VOID || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || + (Opcode <= INT_MAX && + (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && + "Opcode is not a memory-accessing opcode!"); + + // Memoize the node unless it returns a flag. + MemIntrinsicSDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, + MemVT, MMO); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, + MemVT, MMO); + } + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { + // If this is FI+Offset, we can model it. + if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) + return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset); + + // If this is (FI+Offset1)+Offset2, we can model it. + if (Ptr.getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Ptr.getOperand(1)) || + !isa<FrameIndexSDNode>(Ptr.getOperand(0))) + return MachinePointerInfo(); + + int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + return MachinePointerInfo::getFixedStack(FI, Offset+ + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { + // If the 'Offset' value isn't a constant, we can't handle this. + if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) + return InferPointerInfo(Ptr, OffsetNode->getSExtValue()); + if (OffsetOp.getOpcode() == ISD::UNDEF) + return InferPointerInfo(Ptr); + return MachinePointerInfo(); +} + + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, + SDValue Ptr, SDValue Offset, + MachinePointerInfo PtrInfo, EVT MemVT, + bool isVolatile, bool isNonTemporal, bool isInvariant, + unsigned Alignment, const MDNode *TBAAInfo, + const MDNode *Ranges) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(VT); + + unsigned Flags = MachineMemOperand::MOLoad; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + Flags |= MachineMemOperand::MOInvariant; + + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr, Offset); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, + TBAAInfo, Ranges); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); +} + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { + if (VT == MemVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == MemVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); + assert(VT.isInteger() == MemVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && + "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? + getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<LoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType, + MemVT, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + MachinePointerInfo PtrInfo, + bool isVolatile, bool isNonTemporal, + bool isInvariant, unsigned Alignment, + const MDNode *TBAAInfo, + const MDNode *Ranges) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo, Ranges); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, + MachinePointerInfo PtrInfo, EVT MemVT, + bool isVolatile, bool isNonTemporal, + unsigned Alignment, const MDNode *TBAAInfo) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, + TBAAInfo); +} + + +SDValue +SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); + assert(LD->getOffset().getOpcode() == ISD::UNDEF && + "Load is already a indexed load!"); + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, + LD->getChain(), Base, Offset, LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + false, LD->getAlignment()); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, MachinePointerInfo PtrInfo, + bool isVolatile, bool isNonTemporal, + unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(Val.getValueType()); + + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, + Val.getValueType().getStoreSize(), Alignment, + TBAAInfo); + + return getStore(Chain, dl, Val, Ptr, MMO); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, + false, VT, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, MachinePointerInfo PtrInfo, + EVT SVT,bool isVolatile, bool isNonTemporal, + unsigned Alignment, + const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(SVT); + + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, + TBAAInfo); + + return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, EVT SVT, + MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (VT == SVT) + return getStore(Chain, dl, Val, Ptr, MMO); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && + "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == SVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, + true, SVT, MMO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast<StoreSDNode>(OrigStore); + assert(ST->getOffset().getOpcode() == ISD::UNDEF && + "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM, + ST->isTruncatingStore(), + ST->getMemoryVT(), + ST->getMemOperand()); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + SDValue SV, + unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + const SDUse *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + // Copy from an SDUse array into an SDValue array for use with + // the regular getNode logic. + SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps); + return getNode(Opcode, DL, VT, &NewOps[0], NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, + const SDValue *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::SELECT_CC: { + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + } + case ISD::BR_CC: { + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + + if (VT != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); + void *IP = 0; + + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + ArrayRef<EVT> ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), + Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const EVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps) { + if (NumVTs == 1) + return getNode(Opcode, DL, VTs[0], Ops, NumOps); + return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.NumVTs == 1) + return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + +#if 0 + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = VT.getScalarType().getSizeInBits()*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + } + break; + } +#endif + + // Memoize the node unless it returns a flag. + SDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + if (NumOps == 1) { + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], + Ops[2]); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + CSEMap.InsertNode(N, IP); + } else { + if (NumOps == 1) { + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], + Ops[2]); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { + return getNode(Opcode, DL, VTList, 0, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1) { + SDValue Ops[] = { N1 }; + return getNode(Opcode, DL, VTList, Ops, 1); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2) { + SDValue Ops[] = { N1, N2 }; + return getNode(Opcode, DL, VTList, Ops, 2); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3) { + SDValue Ops[] = { N1, N2, N3 }; + return getNode(Opcode, DL, VTList, Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VTList, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VTList, Ops, 5); +} + +SDVTList SelectionDAG::getVTList(EVT VT) { + return makeVTList(SDNode::getValueTypeList(VT), 1); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) + return *I; + + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + SDVTList Result = makeVTList(Array, 2); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3) + return *I; + + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + SDVTList Result = makeVTList(Array, 3); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3 && I->VTs[3] == VT4) + return *I; + + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + SDVTList Result = makeVTList(Array, 4); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { + switch (NumVTs) { + case 0: llvm_unreachable("Cannot have nodes without results!"); + case 1: return getVTList(VTs[0]); + case 2: return getVTList(VTs[0], VTs[1]); + case 3: return getVTList(VTs[0], VTs[1], VTs[2]); + case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]); + default: break; + } + + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) { + if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) + continue; + + if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) + return *I; + } + + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + std::copy(VTs, VTs+NumVTs, Array); + SDVTList Result = makeVTList(Array, NumVTs); + VTList.push_back(Result); + return Result; +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return N; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + N->OperandList[0].set(Op); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return N; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + if (N->OperandList[0] != Op1) + N->OperandList[0].set(Op1); + if (N->OperandList[1] != Op2) + N->OperandList[1].set(Op2); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { + SDValue Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops, 3); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops, 4); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4, SDValue Op5) { + SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops, 5); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // Check to see if there is no change. + bool AnyChange = false; + for (unsigned i = 0; i != NumOps; ++i) { + if (Ops[i] != N->getOperand(i)) { + AnyChange = true; + break; + } + } + + // No operands changed, just return the input node. + if (!AnyChange) return N; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) + if (N->OperandList[i] != Ops[i]) + N->OperandList[i].set(Ops[i]); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +/// DropOperands - Release the operands and set this node to have +/// zero operands. +void SDNode::DropOperands() { + // Unlike the code in MorphNodeTo that does this, we don't need to + // watch for dead nodes here. + for (op_iterator I = op_begin(), E = op_end(); I != E; ) { + SDUse &Use = *I++; + Use.set(SDValue()); + } +} + +/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a +/// machine opcode. +/// +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, 0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, EVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); + // Reset the NodeID to -1. + N->setNodeId(-1); + return N; +} + +/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// the line number information on the merged node since it is not possible to +/// preserve the information that operation is associated with multiple lines. +/// This will make the debugger working better at -O0, were there is a higher +/// probability having other instructions associated with that line. +/// +SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { + DebugLoc NLoc = N->getDebugLoc(); + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + N->setDebugLoc(DebugLoc()); + } + return N; +} + +/// MorphNodeTo - This *mutates* the specified node to have the specified +/// return type, opcode, and operands. +/// +/// Note that MorphNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. Note that the DebugLoc need not be the same. +/// +/// Using MorphNodeTo is faster than creating a new node and swapping it in +/// with ReplaceAllUsesWith both because it often avoids allocating a new +/// node, and because it doesn't require CSE recalculation for any of +/// the node's users. +/// +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + // If an identical node already exists, use it. + void *IP = 0; + if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); + } + + if (!RemoveNodeFromCSEMaps(N)) + IP = 0; + + // Start the morphing. + N->NodeType = Opc; + N->ValueList = VTs.VTs; + N->NumValues = VTs.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. Keep track of any operands that become dead as a result. + SmallPtrSet<SDNode*, 16> DeadNodeSet; + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Used = Use.getNode(); + Use.set(SDValue()); + if (Used->use_empty()) + DeadNodeSet.insert(Used); + } + + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { + // Initialize the memory references information. + MN->setMemRefs(0, 0); + // If NumOps is larger than the # of operands we can have in a + // MachineSDNode, reallocate the operand list. + if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { + if (MN->OperandsNeedDelete) + delete[] MN->OperandList; + if (NumOps > array_lengthof(MN->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops, NumOps); + else + MN->InitOperands(MN->LocalOperands, Ops, NumOps); + MN->OperandsNeedDelete = false; + } else + MN->InitOperands(MN->OperandList, Ops, NumOps); + } else { + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->InitOperands(new SDUse[NumOps], Ops, NumOps); + N->OperandsNeedDelete = true; + } else + N->InitOperands(N->OperandList, Ops, NumOps); + } + + // Delete any nodes that are still dead after adding the uses for the + // new operands. + if (!DeadNodeSet.empty()) { + SmallVector<SDNode *, 16> DeadNodes; + for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), + E = DeadNodeSet.end(); I != E; ++I) + if ((*I)->use_empty()) + DeadNodes.push_back(*I); + RemoveDeadNodes(DeadNodes); + } + + if (IP) + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + + +/// getMachineNode - These are used for target selectors to create a new node +/// with specified return type(s), MachineInstr opcode, and operands. +/// +/// Note that getMachineNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, 0, 0); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return getMachineNode(Opcode, dl, VTs, 0, 0); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, + EVT VT2, EVT VT3, EVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + ArrayRef<EVT> ResultTys, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, + const SDValue *Ops, unsigned NumOps) { + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; + MachineSDNode *N; + void *IP = 0; + + if (DoCSE) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); + IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL)); + } + } + + // Allocate a new MachineSDNode. + N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs); + + // Initialize the operands list. + if (NumOps > array_lengthof(N->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops, NumOps); + else + N->InitOperands(N->LocalOperands, Ops, NumOps); + N->OperandsNeedDelete = false; + + if (DoCSE) + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyMachineNode(N); +#endif + return N; +} + +/// getTargetExtractSubreg - A convenience function for creating +/// TargetOpcode::EXTRACT_SUBREG nodes. +SDValue +SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + VT, Operand, SRIdxVal); + return SDValue(Subreg, 0); +} + +/// getTargetInsertSubreg - A convenience function for creating +/// TargetOpcode::INSERT_SUBREG nodes. +SDValue +SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand, SDValue Subreg) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + VT, Operand, Subreg, SRIdxVal); + return SDValue(Result, 0); +} + +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return E; + } + return NULL; +} + +/// getDbgValue - Creates a SDDbgValue node. +/// +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O); +} + +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); +} + +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); +} + +namespace { + +/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node +/// pointed to by a use iterator is deleted, increment the use iterator +/// so that it doesn't dangle. +/// +class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { + SDNode::use_iterator &UI; + SDNode::use_iterator &UE; + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + // Increment the iterator as needed. + while (UI != UE && N == *UI) + ++UI; + } + +public: + RAUWUpdateListener(SelectionDAG &d, + SDNode::use_iterator &ui, + SDNode::use_iterator &ue) + : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} +}; + +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From has a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { + SDNode *From = FromN.getNode(); + assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && + "Cannot replace with this method!"); + assert(From != To.getNode() && "Cannot replace uses of with self"); + + // Iterate over all the existing uses of From. New uses will be added + // to the beginning of the use list, which we avoid visiting. + // This specifically avoids visiting uses of From that arise while the + // replacement is happening, because any such uses would be the result + // of CSE: If an existing node looks like From after one of its operands + // is replaced by To, we don't want to replace of all its users with To + // too. See PR3018 for more info. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes that for each value of From, there is a +/// corresponding value in To in the same position with the same type. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { +#ifndef NDEBUG + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + assert((!From->hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && + "Cannot use this version of ReplaceAllUsesWith!"); +#endif + + // Handle the trivial case. + if (From == To) + return; + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.setNode(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { + if (From->getNumValues() == 1) // Handle the simple case efficiently. + return ReplaceAllUsesWith(SDValue(From, 0), To[0]); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + const SDValue &ToOp = To[Use.getResNo()]; + ++UI; + Use.set(ToOp); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The Deleted +/// vector is handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ + // Handle the really simple, really trivial case efficiently. + if (From == To) return; + + // Handle the simple, trivial, case efficiently. + if (From.getNode()->getNumValues() == 1) { + ReplaceAllUsesWith(From, To); + return; + } + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From.getNode()->use_begin(), + UE = From.getNode()->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + bool UserRemovedFromCSEMaps = false; + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + + // Skip uses of different values from the same node. + if (Use.getResNo() != From.getResNo()) { + ++UI; + continue; + } + + // If this node hasn't been modified yet, it's still in the CSE maps, + // so remove its old self from the CSE maps. + if (!UserRemovedFromCSEMaps) { + RemoveNodeFromCSEMaps(User); + UserRemovedFromCSEMaps = true; + } + + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // We are iterating over all uses of the From node, so if a use + // doesn't use the specific value, no changes are made. + if (!UserRemovedFromCSEMaps) + continue; + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); +} + +namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith + /// to record information about a use. + struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; + }; + + /// operator< - Sort Memos by User. + bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; + } +} + +/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The same value +/// may appear in both the From and To list. The Deleted vector is +/// handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, + const SDValue *To, + unsigned Num){ + // Handle the simple, trivial case efficiently. + if (Num == 1) + return ReplaceAllUsesOfValueWith(*From, *To); + + // Read up all the uses and make records of them. This helps + // processing new uses that are introduced during the + // replacement process. + SmallVector<UseMemo, 4> Uses; + for (unsigned i = 0; i != Num; ++i) { + unsigned FromResNo = From[i].getResNo(); + SDNode *FromNode = From[i].getNode(); + for (SDNode::use_iterator UI = FromNode->use_begin(), + E = FromNode->use_end(); UI != E; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == FromResNo) { + UseMemo Memo = { *UI, i, &Use }; + Uses.push_back(Memo); + } + } + } + + // Sort the uses, so that all the uses from a given User are together. + std::sort(Uses.begin(), Uses.end()); + + for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); + UseIndex != UseIndexEnd; ) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Uses[UseIndex].User; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // The Uses array is sorted, so all the uses for a given User + // are next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + unsigned i = Uses[UseIndex].Index; + SDUse &Use = *Uses[UseIndex].Use; + ++UseIndex; + + Use.set(To[i]); + } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder() { + + unsigned DAGSize = 0; + + // SortedPos tracks the progress of the algorithm. Nodes before it are + // sorted, nodes after it are unsorted. When the algorithm completes + // it is at the end of the list. + allnodes_iterator SortedPos = allnodes_begin(); + + // Visit all the nodes. Move nodes with no operands to the front of + // the list immediately. Annotate nodes that do have operands with their + // operand count. Before we do this, the Node Id fields of the nodes + // may contain arbitrary values. After, the Node Id fields for nodes + // before SortedPos will contain the topological sort index, and the + // Node Id fields for nodes At SortedPos and after will contain the + // count of outstanding operands. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + SDNode *N = I++; + checkForCycles(N); + unsigned Degree = N->getNumOperands(); + if (Degree == 0) { + // A node with no uses, add it to the result array immediately. + N->setNodeId(DAGSize++); + allnodes_iterator Q = N; + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Temporarily use the Node Id as scratch space for the degree count. + N->setNodeId(Degree); + } + } + + // Visit all the nodes. As we iterate, move nodes into sorted order, + // such that by the time the end is reached all nodes will be sorted. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { + SDNode *N = I; + checkForCycles(N); + // N is in sorted position, so all its uses have one less operand + // that needs to be sorted. + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *P = *UI; + unsigned Degree = P->getNodeId(); + assert(Degree != 0 && "Invalid node degree"); + --Degree; + if (Degree == 0) { + // All of P's operands are sorted, so P may sorted now. + P->setNodeId(DAGSize++); + if (P != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Update P's outstanding operand count. + P->setNodeId(Degree); + } + } + if (I == SortedPos) { +#ifndef NDEBUG + SDNode *S = ++I; + dbgs() << "Overran sorted position:\n"; + S->dumprFull(); +#endif + llvm_unreachable(0); + } + } + + assert(SortedPos == AllNodes.end() && + "Topological sort incomplete!"); + assert(AllNodes.front().getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token!"); + assert(AllNodes.front().getNodeId() == 0 && + "First node in topological sort has non-zero id!"); + assert(AllNodes.front().getNumOperands() == 0 && + "First node in topological sort has operands!"); + assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && + "Last node in topologic sort has unexpected id!"); + assert(AllNodes.back().use_empty() && + "Last node in topologic sort has users!"); + assert(DAGSize == allnodes_size() && "Node count mismatch!"); + return DAGSize; +} + +/// AssignOrdering - Assign an order to the SDNode. +void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) { + assert(SD && "Trying to assign an order to a null node!"); + Ordering->add(SD, Order); +} + +/// GetOrdering - Get the order for the SDNode. +unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { + assert(SD && "Trying to get the order of a null node!"); + return Ordering->getOrder(SD); +} + +/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the +/// value is produced by SD. +void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { + DbgInfo->add(DB, SD, isParameter); + if (SD) + SD->setHasDebugValue(true); +} + +/// TransferDbgValues - Transfer SDDbgValues. +void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { + if (From == To || !From.getNode()->getHasDebugValue()) + return; + SDNode *FromNode = From.getNode(); + SDNode *ToNode = To.getNode(); + ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); + I != E; ++I) { + SDDbgValue *Dbg = *I; + if (Dbg->getKind() == SDDbgValue::SDNODE) { + SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), + Dbg->getOffset(), Dbg->getDebugLoc(), + Dbg->getOrder()); + ClonedDVs.push_back(Clone); + } + } + for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(), + E = ClonedDVs.end(); I != E; ++I) + AddDbgValue(*I, ToNode, false); +} + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +HandleSDNode::~HandleSDNode() { + DropOperands(); +} + +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, + const GlobalValue *GA, + EVT VT, int64_t o, unsigned char TF) + : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + TheGlobal = GA; +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, + MachineMemOperand *mmo) + : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(isNonTemporal() == MMO->isNonTemporal() && + "Non-temporal encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, + const SDValue *Ops, unsigned NumOps, EVT memvt, + MachineMemOperand *mmo) + : SDNode(Opc, dl, VTs, Ops, NumOps), + MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) const { + AddNodeIDNode(ID, this); +} + +namespace { + struct EVTArray { + std::vector<EVT> VTs; + + EVTArray() { + VTs.reserve(MVT::LAST_VALUETYPE); + for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) + VTs.push_back(MVT((MVT::SimpleValueType)i)); + } + }; +} + +static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs; +static ManagedStatic<EVTArray> SimpleVTArray; +static ManagedStatic<sys::SmartMutex<true> > VTMutex; + +/// getValueTypeList - Return a pointer to the specified value type. +/// +const EVT *SDNode::getValueTypeList(EVT VT) { + if (VT.isExtended()) { + sys::SmartScopedLock<true> Lock(*VTMutex); + return &(*EVTs->insert(VT).first); + } else { + assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE && + "Value type out of range!"); + return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; + } +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // TODO: Only iterate over uses of a given value of the node + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + if (UI.getUse().getResNo() == Value) { + if (NUses == 0) + return false; + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + + +/// hasAnyUseOfValue - Return true if there are any use of the indicated +/// value. This method ignores uses of other values defined by this operation. +bool SDNode::hasAnyUseOfValue(unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) + if (UI.getUse().getResNo() == Value) + return true; + + return false; +} + + +/// isOnlyUserOf - Return true if this node is the only use of N. +/// +bool SDNode::isOnlyUserOf(SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +/// +bool SDValue::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (*this == N->getOperand(i)) + return true; + return false; +} + +bool SDNode::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->NumOperands; i != e; ++i) + if (this == N->OperandList[i].getNode()) + return true; + return false; +} + +/// reachesChainWithoutSideEffects - Return true if this operand (which must +/// be a chain) reaches the specified operand without crossing any +/// side-effecting instructions on any chain path. In practice, this looks +/// through token factors and non-volatile loads. In order to remain efficient, +/// this only looks a couple of nodes in, it does not do an exhaustive search. +bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, + unsigned Depth) const { + if (*this == Dest) return true; + + // Don't search too deeply, we just want to be able to see through + // TokenFactor's etc. + if (Depth == 0) return false; + + // If this is a token factor, all inputs to the TF happen in parallel. If any + // of the operands of the TF does not reach dest, then we cannot do the xform. + if (getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return false; + return true; + } + + // Loads don't have side effects, look through them. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { + if (!Ld->isVolatile()) + return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); + } + return false; +} + +/// hasPredecessor - Return true if N is a predecessor of this node. +/// N is either an operand of this node, or can be reached by recursively +/// traversing up the operands. +/// NOTE: This is an expensive method. Use it carefully. +bool SDNode::hasPredecessor(const SDNode *N) const { + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + return hasPredecessorHelper(N, Visited, Worklist); +} + +bool SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVector<const SDNode *, 16> &Worklist) const { + if (Visited.empty()) { + Worklist.push_back(this); + } else { + // Take a look in the visited set. If we've already encountered this node + // we needn't search further. + if (Visited.count(N)) + return true; + } + + // Haven't visited N yet. Continue the search. + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + SDNode *Op = M->getOperand(i).getNode(); + if (Visited.insert(Op)) + Worklist.push_back(Op); + if (Op == N) + return true; + } + } + + return false; +} + +uint64_t SDNode::getConstantOperandVal(unsigned Num) const { + assert(Num < NumOperands && "Invalid child # of SDNode!"); + return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); +} + +SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { + assert(N->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + + EVT VT = N->getValueType(0); + unsigned NE = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(N->getNumOperands()); + + // If ResNE is 0, fully unroll the vector op. + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + unsigned i; + for (i= 0; i != NE; ++i) { + for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperandEltVT, + Operand, + getConstant(i, TLI.getPointerTy())); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (N->getOpcode()) { + default: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + &Operands[0], Operands.size())); + break; + case ISD::VSELECT: + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, + &Operands[0], Operands.size())); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); + break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: { + EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + Operands[0], + getValueType(ExtVT))); + } + } + } + + for (; i < ResNE; ++i) + Scalars.push_back(getUNDEF(EltVT)); + + return getNode(ISD::BUILD_VECTOR, dl, + EVT::getVectorVT(*getContext(), EltVT, ResNE), + &Scalars[0], Scalars.size()); +} + + +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist) const { + if (LD->getChain() != Base->getChain()) + return false; + EVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo(); + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + + // Handle X+C + if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && + cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) + return true; + + const GlobalValue *GV1 = NULL; + const GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if +/// it cannot be inferred. +unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { + // If this is a GlobalAddress + cst, return the alignment. + const GlobalValue *GV; + int64_t GVOffset = 0; + if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); + llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, + TLI.getDataLayout()); + unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; + if (Align) + return MinAlign(Align, GVOffset); + } + + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (isBaseWithConstantOffset(Ptr) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + // Handle FI+Cst + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + return FIInfoAlign; + } + + return 0; +} + +// getAddressSpace - Return the address space this GlobalAddress belongs to. +unsigned GlobalAddressSDNode::getAddressSpace() const { + return getGlobal()->getType()->getAddressSpace(); +} + + +Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, + APInt &SplatUndef, + unsigned &SplatBitSize, + bool &HasAnyUndefs, + unsigned MinSplatBits, + bool isBigEndian) { + EVT VT = getValueType(0); + assert(VT.isVector() && "Expected a vector type"); + unsigned sz = VT.getSizeInBits(); + if (MinSplatBits > sz) + return false; + + SplatValue = APInt(sz, 0); + SplatUndef = APInt(sz, 0); + + // Get the bits. Bits with undefined values (when the corresponding element + // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared + // in SplatValue. If any of the values are not constant, give up and return + // false. + unsigned int nOps = getNumOperands(); + assert(nOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); + + for (unsigned j = 0; j < nOps; ++j) { + unsigned i = isBigEndian ? nOps-1-j : j; + SDValue OpVal = getOperand(i); + unsigned BitPos = j * EltBitSize; + + if (OpVal.getOpcode() == ISD::UNDEF) + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); + else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) + SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos; + else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) + SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; + else + return false; + } + + // The build_vector is all constants or undefs. Find the smallest element + // size that splats the vector. + + HasAnyUndefs = (SplatUndef != 0); + while (sz > 8) { + + unsigned HalfSize = sz / 2; + APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatValue.trunc(HalfSize); + APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = SplatUndef.trunc(HalfSize); + + // If the two halves do not match (ignoring undef bits), stop here. + if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || + MinSplatBits > HalfSize) + break; + + SplatValue = HighValue | LowValue; + SplatUndef = HighUndef & LowUndef; + + sz = HalfSize; + } + + SplatBitSize = sz; + return true; +} + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { + // Find the first non-undef value in the shuffle mask. + unsigned i, e; + for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) + /* search */; + + assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + + // Make sure all remaining elements are either undef or the same as the first + // non-undef value. + for (int Idx = Mask[i]; i != e; ++i) + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + return true; +} + +#ifdef XDEBUG +static void checkForCyclesHelper(const SDNode *N, + SmallPtrSet<const SDNode*, 32> &Visited, + SmallPtrSet<const SDNode*, 32> &Checked) { + // If this node has already been checked, don't check it again. + if (Checked.count(N)) + return; + + // If a node has already been visited on this depth-first walk, reject it as + // a cycle. + if (!Visited.insert(N)) { + dbgs() << "Offending node:\n"; + N->dumprFull(); + errs() << "Detected cycle in SelectionDAG\n"; + abort(); + } + + for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); + + Checked.insert(N); + Visited.erase(N); +} +#endif + +void llvm::checkForCycles(const llvm::SDNode *N) { +#ifdef XDEBUG + assert(N && "Checking nonexistant SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked); +#endif +} + +void llvm::checkForCycles(const llvm::SelectionDAG *DAG) { + checkForCycles(DAG->getRoot().getNode()); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp new file mode 100644 index 0000000..ce40cd6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -0,0 +1,6873 @@ +//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IntegersSubsetMapping.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +using namespace llvm; + +/// LimitFloatPrecision - Generate low-precision inline sequences for +/// some float libcalls (6, 8 or 12 bits). +static unsigned LimitFloatPrecision; + +static cl::opt<unsigned, true> +LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), + cl::init(0)); + +// Limit the width of DAG chains. This is important in general to prevent +// prevent DAG-based analysis from blowing up. For example, alias analysis and +// load clustering may not complete in reasonable time. It is difficult to +// recognize and avoid this situation within each individual analysis, and +// future analyses are likely to have the same behavior. Limiting DAG width is +// the safe approach, and will be especially important with global DAGs. +// +// MaxParallelChains default is arbitrarily high to avoid affecting +// optimization, but could be lowered to improve compile time. Any ld-ld-st-st +// sequence over this should have been converted to llvm.memcpy by the +// frontend. It easy to induce this behavior with .ll code such as: +// %buffer = alloca [4096 x i8] +// %data = load [4096 x i8]* %argPtr +// store [4096 x i8] %data, [4096 x i8]* %buffer +static const unsigned MaxParallelChains = 64; + +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V); + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, + const Value *V, + ISD::NodeType AssertOp = ISD::DELETED_NODE) { + if (ValueVT.isVector()) + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, + PartVT, ValueVT, V); + + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + if (NumParts > 1) { + // Assemble the value from multiple parts. + if (ValueVT.isInteger()) { + unsigned PartBits = PartVT.getSizeInBits(); + unsigned ValueBits = ValueVT.getSizeInBits(); + + // Assemble the power of 2 part. + unsigned RoundParts = NumParts & (NumParts - 1) ? + 1 << Log2_32(NumParts) : NumParts; + unsigned RoundBits = PartBits * RoundParts; + EVT RoundVT = RoundBits == ValueBits ? + ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); + SDValue Lo, Hi; + + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); + + if (RoundParts > 2) { + Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, + PartVT, HalfVT, V); + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, + RoundParts / 2, PartVT, HalfVT, V); + } else { + Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); + } + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); + + if (RoundParts < NumParts) { + // Assemble the trailing non-power-of-2 part. + unsigned OddParts = NumParts - RoundParts; + EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); + Hi = getCopyFromParts(DAG, DL, + Parts + RoundParts, OddParts, PartVT, OddVT, V); + + // Combine the round and odd parts. + Lo = Val; + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), + TLI.getPointerTy())); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); + } + } else if (PartVT.isFloatingPoint()) { + // FP split into multiple FP parts (for ppcf128) + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && + "Unexpected split"); + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); + } else { + // FP split into integer parts (soft fp) + assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && + !PartVT.isVector() && "Unexpected split"); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); + } + } + + // There is now one part, held in Val. Correct it to match ValueVT. + EVT PartEVT = Val.getValueType(); + + if (PartEVT == ValueVT) + return Val; + + if (PartEVT.isInteger() && ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartEVT)) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp != ISD::DELETED_NODE) + Val = DAG.getNode(AssertOp, DL, PartEVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); + } + + if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + // FP_ROUND's are always exact here. + if (ValueVT.bitsLT(Val.getValueType())) + return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, + DAG.getTargetConstant(1, TLI.getPointerTy())); + + return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); + } + + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + llvm_unreachable("Unknown mismatch!"); +} + +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger then ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V) { + assert(ValueVT.isVector() && "Not a vector value"); + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + // Handle a multi-element vector. + if (NumParts > 1) { + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getSimpleValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, + PartVT, IntermediateVT, V); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate + // operands from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, + PartVT, IntermediateVT, V); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the + // intermediate operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, + ValueVT, &Ops[0], NumIntermediates); + } + + // There is now one part, held in Val. Correct it to match ValueVT. + EVT PartEVT = Val.getValueType(); + + if (PartEVT == ValueVT) + return Val; + + if (PartEVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getIntPtrConstant(0)); + } + + // Vector/Vector bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + "Cannot handle this kind of promotion"); + // Promoted vector extract + bool Smaller = ValueVT.bitsLE(PartEVT); + return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT, Val); + + } + + // Trivial bitcast if the types are the same size and the destination + // vector type is legal. + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && + TLI.isTypeLegal(ValueVT)) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle cases such as i8 -> <1 x i1> + if (ValueVT.getVectorNumElements() != 1) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("non-trivial scalar-to-vector conversion"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + report_fatal_error("Cannot handle scalar-to-vector conversion!"); + } + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartEVT) { + bool Smaller = ValueVT.bitsLE(PartEVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT.getScalarType(), Val); + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); +} + +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V); + +/// getCopyToParts - Create a series of nodes that contain the specified value +/// split into legal parts. If the parts contain more bits than Val, then, for +/// integers, ExtendKind can be used to specify how to generate the extra bits. +static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + EVT ValueVT = Val.getValueType(); + + // Handle the vector case separately. + if (ValueVT.isVector()) + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned PartBits = PartVT.getSizeInBits(); + unsigned OrigNumParts = NumParts; + assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + + if (NumParts == 0) + return; + + assert(!ValueVT.isVector() && "Vector case handled elsewhere"); + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } + + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else { + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartEVT != ValueVT); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); + + if (NumParts == 1) { + if (PartEVT != ValueVT) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("scalar-to-vector conversion failed"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + } + + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, + DAG.getIntPtrConstant(RoundBits)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); + + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + + NumParts = RoundParts; + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BITCAST, DL, + EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()), + Val); + + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(1)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(0)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); + } + } + } + + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); +} + + +/// getCopyToPartsVector - Create a series of nodes that contain the specified +/// value split into legal parts. +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V) { + EVT ValueVT = Val.getValueType(); + assert(ValueVT.isVector() && "Not a vector"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (NumParts == 1) { + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && + PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ElementVT, Val, DAG.getIntPtrConstant(i))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else if (PartVT.isVector() && + PartEVT.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + + // Promoted vector extract + bool Smaller = PartEVT.bitsLE(ValueVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); + } else{ + // Vector -> scalar conversion. + assert(ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + PartVT, Val, DAG.getIntPtrConstant(0)); + + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); + } + + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, + IntermediateVT, + NumIntermediates, RegisterVT); + unsigned NumElements = ValueVT.getVectorNumElements(); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + // Split the vector into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) { + if (IntermediateVT.isVector()) + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, + IntermediateVT, Val, + DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); + else + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + IntermediateVT, Val, DAG.getIntPtrConstant(i)); + } + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); + } +} + +namespace { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information + /// about the value. The most common situation is to represent one value at a + /// time, but struct or array values are handled element-wise as multiple + /// values. The splitting of aggregates is performed recursively, so that we + /// never have aggregate-typed registers. The values at this point do not + /// necessarily have legal types, so each value may require one or more + /// registers of some legal type. + /// + struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<EVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<MVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue() {} + + RegsForValue(const SmallVector<unsigned, 4> ®s, + MVT regvt, EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + MVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal(const TargetLowering &TLI) { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT RegisterVT = RegVTs[Value]; + if (!TLI.isTypeLegal(RegisterVT)) + return false; + } + return true; + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag, + const Value *V = 0) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag, const Value *V) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const; + }; +} + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag, + const Value *V) const { + // A Value with type {} or [0 x %t] needs no registers. + if (ValueVTs.empty()) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + MVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + Parts[i] = P; + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + !RegisterVT.isInteger() || RegisterVT.isVector()) + continue; + + const FunctionLoweringInfo::LiveOutInfo *LOI = + FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); + if (!LOI) + continue; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI->NumSignBits; + unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + else + continue; + + // Add an assertion node. + assert(FromVT != MVT::Other); + Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT, V); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag, + const Value *V) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + MVT RegisterVT = RegVTs[Value]; + ISD::NodeType ExtendKind = + TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; + + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT, V, ExtendKind); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + else if (!Regs.empty() && + TargetRegisterInfo::isVirtualRegister(Regs.front())) { + // Put the register class of the virtual registers in the flag word. That + // way, later passes can recompute register class constraints for inline + // assembly as well as normal instructions. + // Don't do this for tied operands that can use the regclass information + // from the def. + const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } + + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + Ops.push_back(Res); + + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + MVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} + +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li) { + AA = &aa; + GFI = gfi; + LibInfo = li; + TD = DAG.getTarget().getDataLayout(); + Context = DAG.getContext(); + LPadToCallSiteMap.clear(); +} + +/// clear - Clear out the current SelectionDAG and the associated +/// state and prepare this SelectionDAGBuilder object to be used +/// for a new block. This doesn't clear out information about +/// additional blocks that are needed to complete switch lowering +/// or PHI node updating; that information is cleared out as it is +/// consumed. +void SelectionDAGBuilder::clear() { + NodeMap.clear(); + UnusedArgNodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + CurDebugLoc = DebugLoc(); + HasTailCall = false; +} + +/// clearDanglingDebugInfo - Clear the dangling debug information +/// map. This function is separated from the clear so that debug +/// information that is dangling in a basic block can be properly +/// resolved in a different basic block. This allows the +/// SelectionDAG to resolve dangling debug information attached +/// to PHI nodes. +void SelectionDAGBuilder::clearDanglingDebugInfo() { + DanglingDebugInfoMap.clear(); +} + +/// getRoot - Return the current virtual root of the Selection DAG, +/// flushing any PendingLoad items. This must be done before emitting +/// a store or any other node that may need to be ordered after any +/// prior load instructions. +/// +SDValue SelectionDAGBuilder::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingLoads[0], PendingLoads.size()); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} + +/// getControlRoot - Similar to getRoot, but instead of flushing all the +/// PendingLoad items, flush all the PendingExports items. It is necessary +/// to do this before emitting a terminator instruction. +/// +SDValue SelectionDAGBuilder::getControlRoot() { + SDValue Root = DAG.getRoot(); + + if (PendingExports.empty()) + return Root; + + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + PendingExports.push_back(Root); + } + + Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingExports[0], + PendingExports.size()); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} + +void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { + if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. + DAG.AssignOrdering(Node, SDNodeOrder); + + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) + AssignOrderingToNode(Node->getOperand(I).getNode()); +} + +void SelectionDAGBuilder::visit(const Instruction &I) { + // Set up outgoing PHI node register values before emitting the terminator. + if (isa<TerminatorInst>(&I)) + HandlePHINodesInSuccessorBlocks(I.getParent()); + + CurDebugLoc = I.getDebugLoc(); + + visit(I.getOpcode(), I); + + if (!isa<TerminatorInst>(&I) && !HasTailCall) + CopyToExportRegsIfNeeded(&I); + + CurDebugLoc = DebugLoc(); +} + +void SelectionDAGBuilder::visitPHI(const PHINode &) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); +} + +void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: llvm_unreachable("Unknown instruction type encountered!"); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; +#include "llvm/IR/Instruction.def" + } + + // Assign the ordering to the freshly created DAG nodes. + if (NodeMap.count(&I)) { + ++SDNodeOrder; + AssignOrderingToNode(getValue(&I).getNode()); + } +} + +// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, +// generate the debug data structures now that we've seen its definition. +void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, + SDValue Val) { + DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; + if (DDI.getDI()) { + const DbgValueInst *DI = DDI.getDI(); + DebugLoc dl = DDI.getdl(); + unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); + MDNode *Variable = DI->getVariable(); + uint64_t Offset = DI->getOffset(); + SDDbgValue *SDV; + if (Val.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + SDV = DAG.getDbgValue(Variable, Val.getNode(), + Val.getResNo(), Offset, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, Val.getNode(), false); + } + } else + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DanglingDebugInfoMap[V] = DanglingDebugInfo(); + } +} + +/// getValue - Return an SDValue for the given Value. +SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // If there's a virtual register allocated and initialized for this + // value, use it. + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + resolveDanglingDebugInfo(V, N); + return N; + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +/// getValueImpl - Helper function for getValue and getNonRegisterValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + if (const Constant *C = dyn_cast<Constant>(V)) { + EVT VT = TLI.getValueType(V->getType(), true); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return DAG.getConstant(*CI, VT); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); + + if (isa<ConstantPointerNull>(C)) + return DAG.getConstant(0, TLI.getPointerTy()); + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return DAG.getConstantFP(*CFP, VT); + + if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) + return DAG.getUNDEF(VT); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the NodeMap!"); + return N1; + } + + if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { + SmallVector<SDValue, 4> Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + // If the operand is an empty aggregate, there are no values. + if (!Val) continue; + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); + } + + return DAG.getMergeValues(&Constants[0], Constants.size(), + getCurDebugLoc()); + } + + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(C)) { + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa<ArrayType>(CDS->getType())) + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } + + if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { + assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && + "Unknown struct or array constant!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector<SDValue, 4> Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + EVT EltVT = ValueVTs[i]; + if (isa<UndefValue>(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, EltVT); + else + Constants[i] = DAG.getConstant(0, EltVT); + } + + return DAG.getMergeValues(&Constants[0], NumElts, + getCurDebugLoc()); + } + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) + return DAG.getBlockAddress(BA, VT); + + VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = VecTy->getNumElements(); + + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector<SDValue, 16> Ops; + if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CV->getOperand(i))); + } else { + assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); + + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, EltVT); + else + Op = DAG.getConstant(0, EltVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } + + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + } + + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + } + + llvm_unreachable("Can't get register for value!"); +} + +void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + SDValue Chain = getControlRoot(); + SmallVector<ISD::OutputArg, 8> Outs; + SmallVector<SDValue, 8> OutVals; + + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; + const Function *F = I.getParent()->getParent(); + + // Emit a store of the return value through the virtual register. + // Leave Outs empty so that LowerReturn won't try to load return + // registers the usual way. + SmallVector<EVT, 1> PtrValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + PtrValueVTs); + + SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetOp = getValue(I.getOperand(0)); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + + SmallVector<SDValue, 4> Chains(NumValues); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + RetPtr.getValueType(), RetPtr, + DAG.getIntPtrConstant(Offsets[i])); + Chains[i] = + DAG.getStore(Chain, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Add, MachinePointerInfo(), false, false, 0); + } + + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + } else if (I.getNumOperands() != 0) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues) { + SDValue RetOp = getValue(I.getOperand(0)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + const Function *F = I.getParent()->getParent(); + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, &I, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (ExtendKind == ISD::SIGN_EXTEND) + Flags.setSExt(); + else if (ExtendKind == ISD::ZERO_EXTEND) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + /*isfixed=*/true, 0, 0)); + OutVals.push_back(Parts[i]); + } + } + } + } + + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CallingConv::ID CallConv = + DAG.getMachineFunction().getFunction()->getCallingConv(); + Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, + Outs, OutVals, getCurDebugLoc(), DAG); + + // Verify that the target's LowerReturn behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerReturn didn't return a valid chain!"); + + // Update the DAG with the new chain value resulting from return lowering. + DAG.setRoot(Chain); +} + +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + // Skip empty types + if (V->getType()->isEmptyTy()) + return; + + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + assert(!V->use_empty() && "Unused value assigned virtual registers!"); + CopyValueToVirtualRegister(V, VMI->second); + } +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { + // No need to export constants. + if (!isa<Instruction>(V) && !isa<Argument>(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); +} + +bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (const Instruction *VI = dyn_cast<Instruction>(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa<Argument>(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (!BPI) + return 0; + const BasicBlock *SrcBB = Src->getBasicBlock(); + const BasicBlock *DstBB = Dst->getBasicBlock(); + return BPI->getEdgeWeight(SrcBB, DstBB); +} + +void SelectionDAGBuilder:: +addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, + uint32_t Weight /* = 0 */) { + if (!Weight) + Weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, Weight); +} + + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +/// +void +SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == SwitchBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = getICmpCondCode(IC->getPredicate()); + } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + Condition = getFCmpCondCode(FC->getPredicate()); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + } else { + Condition = ISD::SETEQ; // silence warning. + llvm_unreachable("Unknown compare instruction"); + } + + CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), + NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + const Instruction *BOp = dyn_cast<Instruction>(Cond); + if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].CC == Cases[1].CC && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; +} + +void SelectionDAGBuilder::visitBr(const BranchInst &I) { + MachineBasicBlock *BrMBB = FuncInfo.MBB; + + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = BrMBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + if (I.isUnconditional()) { + // Update machine-CFG edges. + BrMBB->addSuccessor(Succ0MBB); + + // If this is not a fall-through branch, emit the branch. + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); + + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive, this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + if (!TLI.isJumpExpensive() && + BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, + BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0], BrMBB); + SwitchCases.erase(SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + FuncInfo.MF->erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), + NULL, Succ0MBB, Succ1MBB, BrMBB); + + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB, BrMBB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + DebugLoc dl = getCurDebugLoc(); + + // Build the setcc now. + if (CB.CmpMHS == NULL) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && + CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && + CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETCC_INVALID && + "Condition is undefined for to-the-range belonging check."); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + SDValue CmpOp = getValue(CB.CmpMHS); + EVT VT = CmpOp.getValueType(); + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + ISD::SETULE); + } else { + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, VT), ISD::SETULE); + } + } + + // Update successor info + addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); + } + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + + // Insert the false branch. Do this even if it's a fall through branch, + // this makes it easier to do DAG optimizations which require inverting + // the branch condition. + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); + + DAG.setRoot(BrCond); +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + EVT PTy = TLI.getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + MVT::Other, Index.getValue(1), + Table, Index); + DAG.setRoot(BrJumpTable); +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB) { + // Subtract the lowest switch case value from the value being switched on and + // conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDValue SwitchOp = getValue(JTH.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(JTH.First, VT)); + + // The SDNode we just created, which holds the value being switched on minus + // the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); + + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the largest + // case in the switch. + SDValue CMP = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(Sub.getValueType()), Sub, + DAG.getConstant(JTH.Last-JTH.First,VT), + ISD::SETUGT); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + if (JT.MBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB)); + + DAG.setRoot(BrCond); +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + // Subtract the minimum value + SDValue SwitchOp = getValue(B.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(B.First, VT)); + + // Check range + SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(Sub.getValueType()), + Sub, DAG.getConstant(B.Range, VT), + ISD::SETUGT); + + // Determine the type of the test operands. + bool UsePtrType = false; + if (!TLI.isTypeLegal(VT)) + UsePtrType = true; + else { + for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) + if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + UsePtrType = true; + break; + } + } + if (UsePtrType) { + VT = TLI.getPointerTy(); + Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); + } + + B.RegVT = VT.getSimpleVT(); + B.Reg = FuncInfo.CreateReg(B.RegVT); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + B.Reg, Sub); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + + addSuccessorWithWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, MBB); + + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + if (MBB != NextBlock) + BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + DAG.getBasicBlock(MBB)); + + DAG.setRoot(BrRange); +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB) { + MVT VT = BB.RegVT; + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + Reg, VT); + SDValue Cmp; + unsigned PopCount = CountPopulation_64(B.Mask); + if (PopCount == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + ShiftOp, + DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), + ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), + ISD::SETNE); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, + DAG.getConstant(1, VT), ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + VT, SwitchVal, DAG.getConstant(B.Mask, VT)); + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + AndOp, DAG.getConstant(0, VT), + ISD::SETNE); + } + + // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. + addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); + // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. + addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); + + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + Cmp, DAG.getBasicBlock(B.TargetBB)); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + if (NextMBB != NextBlock) + BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB)); + + DAG.setRoot(BrAnd); +} + +void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; + + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + + const Value *Callee(I.getCalledValue()); + const Function *Fn = dyn_cast<Function>(Callee); + if (isa<InlineAsm>(Callee)) + visitInlineAsm(&I); + else if (Fn && Fn->isIntrinsic()) { + assert(Fn->getIntrinsicID() == Intrinsic::donothing); + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + } else + LowerCallTo(&I, getValue(Callee), false, LandingPad); + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + CopyToExportRegsIfNeeded(&I); + + // Update successor info + addSuccessorWithWeight(InvokeMBB, Return); + addSuccessorWithWeight(InvokeMBB, LandingPad); + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); +} + +void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { + assert(FuncInfo.MBB->isLandingPad() && + "Call to landingpad not in landing pad!"); + + MachineBasicBlock *MBB = FuncInfo.MBB; + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + AddLandingPadInfo(LP, MMI, MBB); + + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother to create these DAG nodes. + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) + return; + + SmallVector<EVT, 2> ValueVTs; + ComputeValueVTs(TLI, LP.getType(), ValueVTs); + + // Insert the EXCEPTIONADDR instruction. + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[2]; + Ops[0] = DAG.getRoot(); + SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1); + SDValue Chain = Op1.getValue(1); + + // Insert the EHSELECTION instruction. + VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + Ops[0] = Op1; + Ops[1] = Chain; + SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2); + Chain = Op2.getValue(1); + Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32); + + Ops[0] = Op1; + Ops[1] = Op2; + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Ops[0], 2); + + std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain); + setValue(&LP, RetPair.first); + DAG.setRoot(RetPair.second); +} + +/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for +/// small case ranges). +bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { + // Size is the number of Cases represented by this range. + size_t Size = CR.Range.second - CR.Range.first; + if (Size > 3) + return false; + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + // If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 cases. + // TODO: Handle cases where CR.CaseBB != SwitchBB. + if (Size == 2 && CR.CaseBB == SwitchBB) { + Case &Small = *CR.Range.first; + Case &Big = *(CR.Range.second-1); + + if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { + const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); + const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); + + // Check that there is only one bit different. + if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && + (SmallValue | BigValue) == BigValue) { + // Isolate the common bit. + APInt CommonBit = BigValue & ~SmallValue; + assert((SmallValue | CommonBit) == BigValue && + CommonBit.countPopulation() == 1 && "Not a common bit?"); + + SDValue CondLHS = getValue(SV); + EVT VT = CondLHS.getValueType(); + DebugLoc DL = getCurDebugLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, VT)); + SDValue Cond = DAG.getSetCC(DL, MVT::i1, + Or, DAG.getConstant(BigValue, VT), + ISD::SETEQ); + + // Update successor info. + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchBB, Small.BB, + Small.ExtraWeight + Big.ExtraWeight); + addSuccessorWithWeight(SwitchBB, Default, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); + + // Insert the true branch. + SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, + getControlRoot(), Cond, + DAG.getBasicBlock(Small.BB)); + + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(Default)); + + DAG.setRoot(BrCond); + return true; + } + } + } + + // Order cases by weight so the most likely case will be checked first. + uint32_t UnhandledWeights = 0; + if (BPI) { + for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { + uint32_t IWeight = I->ExtraWeight; + UnhandledWeights += IWeight; + for (CaseItr J = CR.Range.first; J < I; ++J) { + uint32_t JWeight = J->ExtraWeight; + if (IWeight > JWeight) + std::swap(*I, *J); + } + } + } + // Rearrange the case blocks so that the last one falls through if possible. + Case &BackCase = *(CR.Range.second-1); + if (Size > 1 && + NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + // We start at the bottom as it's the case with the least weight. + for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ + if (I->BB == NextBlock) { + std::swap(*I, BackCase); + break; + } + } + } + + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CR.CaseBB; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + MachineBasicBlock *FallThrough; + if (I != E-1) { + FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); + CurMF->insert(BBI, FallThrough); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } + + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->High == I->Low) { + // This is just small small case range :) containing exactly 1 case + CC = ISD::SETEQ; + LHS = SV; RHS = I->High; MHS = NULL; + } else { + CC = ISD::SETCC_INVALID; + LHS = I->Low; MHS = SV; RHS = I->High; + } + + // The false weight should be sum of all un-handled cases. + UnhandledWeights -= I->ExtraWeight; + CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, + /* me */ CurBlock, + /* trueweight */ I->ExtraWeight, + /* falseweight */ UnhandledWeights); + + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == SwitchBB) + visitSwitchCase(CB, SwitchBB); + else + SwitchCases.push_back(CB); + + CurBlock = FallThrough; + } + + return true; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return TLI.supportJumpTables() && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +static APInt ComputeRange(const APInt &First, const APInt &Last) { + uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; + APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); + return (LastExt - FirstExt + 1ULL); +} + +/// handleJTSwitchCase - Emit jumptable for current switch case range +bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, + CaseRecVector &WorkList, + const Value *SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + + APInt TSize(First.getBitWidth(), 0); + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) + TSize += I->size(); + + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) + return false; + + APInt Range = ComputeRange(First, Last); + // The density is TSize / Range. Require at least 40%. + // It should not be possible for IntTSize to saturate for sane code, but make + // sure we handle Range saturation correctly. + uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); + uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); + if (IntTSize * 10 < IntRange * 4) + return false; + + DEBUG(dbgs() << "Lowering jump table\n" + << "First entry: " << First << ". Last entry: " << Last << '\n' + << "Range: " << Range << ". Size: " << TSize << ".\n\n"); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Create a new basic block to hold the code for loading the address + // of the jump table, and jumping to it. Update successor information; + // we will either branch to the default case for the switch, or the jump + // table. + MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, JumpTableBB); + + addSuccessorWithWeight(CR.CaseBB, Default); + addSuccessorWithWeight(CR.CaseBB, JumpTableBB); + + // Build a vector of destination BBs, corresponding to each target + // of the jump table. If the value of the jump table slot corresponds to + // a case statement, push the case's BB onto the vector, otherwise, push + // the default BB. + std::vector<MachineBasicBlock*> DestBBs; + APInt TEI = First; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { + const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); + const APInt &High = cast<ConstantInt>(I->High)->getValue(); + + if (Low.ule(TEI) && TEI.ule(High)) { + DestBBs.push_back(I->BB); + if (TEI==High) + ++I; + } else { + DestBBs.push_back(Default); + } + } + + // Calculate weight for each unique destination in CR. + DenseMap<MachineBasicBlock*, uint32_t> DestWeights; + if (FuncInfo.BPI) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(I->BB); + if (Itr != DestWeights.end()) + Itr->second += I->ExtraWeight; + else + DestWeights[I->BB] = I->ExtraWeight; + } + + // Update successor info. Add one edge to each unique successor. + BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); + for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(*I); + addSuccessorWithWeight(JumpTableBB, *I, + Itr != DestWeights.end() ? Itr->second : 0); + } + } + + // Create a jump table index for this jump table. + unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) + ->createJumpTableIndex(DestBBs); + + // Set the jump table information so that we can codegen it as a second + // MachineBasicBlock + JumpTable JT(-1U, JTI, JumpTableBB, Default); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); + if (CR.CaseBB == SwitchBB) + visitJumpTableHeader(JT, JTH, SwitchBB); + + JTCases.push_back(JumpTableBlock(JTH, JT)); + return true; +} + +/// handleBTSplitSwitchCase - emit comparison and split binary search tree into +/// 2 subtrees. +bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; + + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + double FMetric = 0; + CaseItr Pivot = CR.Range.first + Size/2; + + // Select optimal pivot, maximizing sum density of LHS and RHS. This will + // (heuristically) allow us to emit JumpTable's later. + APInt TSize(First.getBitWidth(), 0); + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + APInt LSize = FrontCase.size(); + APInt RSize = TSize-LSize; + DEBUG(dbgs() << "Selecting best pivot: \n" + << "First: " << First << ", Last: " << Last <<'\n' + << "LSize: " << LSize << ", RSize: " << RSize << '\n'); + for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; + J!=E; ++I, ++J) { + const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); + const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); + APInt Range = ComputeRange(LEnd, RBegin); + assert((Range - 2ULL).isNonNegative() && + "Invalid case distance"); + // Use volatile double here to avoid excess precision issues on some hosts, + // e.g. that use 80-bit X87 registers. + volatile double LDensity = + (double)LSize.roundToDouble() / + (LEnd - First + 1ULL).roundToDouble(); + volatile double RDensity = + (double)RSize.roundToDouble() / + (Last - RBegin + 1ULL).roundToDouble(); + double Metric = Range.logBase2()*(LDensity+RDensity); + // Should always split in some non-trivial place + DEBUG(dbgs() <<"=>Step\n" + << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' + << "LDensity: " << LDensity + << ", RDensity: " << RDensity << '\n' + << "Metric: " << Metric << '\n'); + if (FMetric < Metric) { + Pivot = J; + FMetric = Metric; + DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); + } + + LSize += J->size(); + RSize -= J->size(); + } + if (areJTsAllowed(TLI)) { + // If our case is dense we *really* should handle it earlier! + assert((FMetric > 0) && "Should handle dense range earlier!"); + } else { + Pivot = CR.Range.first + Size/2; + } + + CaseRange LHSR(CR.Range.first, Pivot); + CaseRange RHSR(Pivot, CR.Range.second); + const Constant *C = Pivot->Low; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + + // We know that we branch to the LHS if the Value being switched on is + // less than the Pivot value, C. We use this to optimize our binary + // tree a bit, by recognizing that if SV is greater than or equal to the + // LHS's Case Value, and that Case Value is exactly one less than the + // Pivot's Value, then we can branch directly to the LHS's Target, + // rather than creating a leaf node for it. + if ((LHSR.second - LHSR.first) == 1 && + LHSR.first->High == CR.GE && + cast<ConstantInt>(C)->getValue() == + (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { + TrueBB = LHSR.first->BB; + } else { + TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, TrueBB); + WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Similar to the optimization above, if the Value being switched on is + // known to be less than the Constant CR.LT, and the current Case Value + // is CR.LT - 1, then we can branch directly to the target block for + // the current Case Value, rather than emitting a RHS leaf node for it. + if ((RHSR.second - RHSR.first) == 1 && CR.LT && + cast<ConstantInt>(RHSR.first->Low)->getValue() == + (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { + FalseBB = RHSR.first->BB; + } else { + FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, FalseBB); + WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Create a CaseBlock record representing a conditional branch to + // the LHS node if the value being switched on SV is less than C. + // Otherwise, branch to LHS. + CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + + if (CR.CaseBB == SwitchBB) + visitSwitchCase(CB, SwitchBB); + else + SwitchCases.push_back(CB); + + return true; +} + +/// handleBitTestsSwitchCase - if current case range has few destination and +/// range span less, than machine word bitwidth, encode case range into series +/// of masks and emit bit tests with these masks. +bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB){ + EVT PTy = TLI.getPointerTy(); + unsigned IntPtrBits = PTy.getSizeInBits(); + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // If target does not have legal shift left, do not emit bit tests at all. + if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + return false; + + size_t numCmps = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) { + // Single case counts one, case range - two. + numCmps += (I->Low == I->High ? 1 : 2); + } + + // Count unique destinations + SmallSet<MachineBasicBlock*, 4> Dests; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + Dests.insert(I->BB); + if (Dests.size() > 3) + // Don't bother the code below, if there are too much unique destinations + return false; + } + DEBUG(dbgs() << "Total number of unique destinations: " + << Dests.size() << '\n' + << "Total number of comparisons: " << numCmps << '\n'); + + // Compute span of values. + const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); + APInt cmpRange = maxValue - minValue; + + DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' + << "Low bound: " << minValue << '\n' + << "High bound: " << maxValue << '\n'); + + if (cmpRange.uge(IntPtrBits) || + (!(Dests.size() == 1 && numCmps >= 3) && + !(Dests.size() == 2 && numCmps >= 5) && + !(Dests.size() >= 3 && numCmps >= 6))) + return false; + + DEBUG(dbgs() << "Emitting bit tests\n"); + APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); + + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + if (maxValue.ult(IntPtrBits)) { + cmpRange = maxValue; + } else { + lowBound = minValue; + } + + CaseBitsVector CasesBits; + unsigned i, count = 0; + + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + MachineBasicBlock* Dest = I->BB; + for (i = 0; i < count; ++i) + if (Dest == CasesBits[i].BB) + break; + + if (i == count) { + assert((count < 3) && "Too much destinations to test!"); + CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); + count++; + } + + const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); + const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); + + uint64_t lo = (lowValue - lowBound).getZExtValue(); + uint64_t hi = (highValue - lowBound).getZExtValue(); + CasesBits[i].ExtraWeight += I->ExtraWeight; + + for (uint64_t j = lo; j <= hi; j++) { + CasesBits[i].Mask |= 1ULL << j; + CasesBits[i].Bits++; + } + + } + std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); + + BitTestInfo BTC; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + DEBUG(dbgs() << "Cases:\n"); + for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { + DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask + << ", Bits: " << CasesBits[i].Bits + << ", BB: " << CasesBits[i].BB << '\n'); + + MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, CaseBB); + BTC.push_back(BitTestCase(CasesBits[i].Mask, + CaseBB, + CasesBits[i].BB, CasesBits[i].ExtraWeight)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + BitTestBlock BTB(lowBound, cmpRange, SV, + -1U, MVT::Other, (CR.CaseBB == SwitchBB), + CR.CaseBB, Default, BTC); + + if (CR.CaseBB == SwitchBB) + visitBitTestHeader(BTB, SwitchBB); + + BitTestCases.push_back(BTB); + + return true; +} + +/// Clusterify - Transform simple list of Cases into list of CaseRange's +size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { + + /// Use a shorter form of declaration, and also + /// show the we want to use CRSBuilder as Clusterifier. + typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; + + Clusterifier TheClusterifier; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + // Start with "simple" cases + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + const BasicBlock *SuccBB = i.getCaseSuccessor(); + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; + + TheClusterifier.add(i.getCaseValueEx(), SMBB, + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); + } + + TheClusterifier.optimize(); + + size_t numCmps = 0; + for (Clusterifier::RangeIterator i = TheClusterifier.begin(), + e = TheClusterifier.end(); i != e; ++i, ++numCmps) { + Clusterifier::Cluster &C = *i; + // Update edge weight for the cluster. + unsigned W = C.first.Weight; + + // FIXME: Currently work with ConstantInt based numbers. + // Changing it to APInt based is a pretty heavy for this commit. + Cases.push_back(Case(C.first.getLow().toConstantInt(), + C.first.getHigh().toConstantInt(), C.second, W)); + + if (C.first.getLow() != C.first.getHigh()) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, + MachineBasicBlock *Last) { + // Update JTCases. + for (unsigned i = 0, e = JTCases.size(); i != e; ++i) + if (JTCases[i].first.HeaderBB == First) + JTCases[i].first.HeaderBB = Last; + + // Update BitTestCases. + for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) + if (BitTestCases[i].Parent == First) + BitTestCases[i].Parent = Last; +} + +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // If there is only the default destination, branch to it if it is not the + // next basic block. Otherwise, just fall through. + if (!SI.getNumCases()) { + // Update machine-CFG edges. + + // If this is not a fall-through branch, emit the branch. + SwitchMBB->addSuccessor(Default); + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Default))); + + return; + } + + // If there are any non-default case statements, create a vector of Cases + // representing each one, and sort the vector so that we can efficiently + // create a binary search tree from them. + CaseVector Cases; + size_t numCmps = Clusterify(Cases, SI); + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'); + (void)numCmps; + + // Get the Value to be switched on and default basic blocks, which will be + // inserted into CaseBlock records, representing basic blocks in the binary + // search tree. + const Value *SV = SI.getCondition(); + + // Push the initial CaseRec onto the worklist + CaseRecVector WorkList; + WorkList.push_back(CaseRec(SwitchMBB,0,0, + CaseRange(Cases.begin(),Cases.end()))); + + while (!WorkList.empty()) { + // Grab a record representing a case range to process off the worklist + CaseRec CR = WorkList.back(); + WorkList.pop_back(); + + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) + continue; + + // If the range has few cases (two or less) emit a series of specific + // tests. + if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) + continue; + + // If the switch has more than N blocks, and is at least 40% dense, and the + // target supports indirect branches, then emit a jump table rather than + // lowering the switch to a binary tree of conditional branches. + // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). + if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) + continue; + + // Emit binary tree. We need to pick a pivot, and push left and right ranges + // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. + handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); + } +} + +void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; + + // Update machine-CFG edges with unique successors. + SmallSet<BasicBlock*, 32> Done; + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { + BasicBlock *BB = I.getSuccessor(i); + bool Inserted = Done.insert(BB); + if (!Inserted) + continue; + + MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; + addSuccessorWithWeight(IndirectBrMBB, Succ); + } + + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + getValue(I.getAddress()))); +} + +void SelectionDAGBuilder::visitFSub(const User &I) { + // -0.0 - X --> fneg + Type *Ty = I.getType(); + if (isa<Constant>(I.getOperand(0)) && + I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + + visitBinary(I, ISD::FSUB); +} + +void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + + // Coerce the shift amount to the right type if we can. + if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { + unsigned ShiftSize = ShiftTy.getSizeInBits(); + unsigned Op2Size = Op2.getValueType().getSizeInBits(); + DebugLoc DL = getCurDebugLoc(); + + // If the operand is smaller than the shift count type, promote it. + if (ShiftSize > Op2Size) + Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); + + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); + // Otherwise we'll need to temporarily settle for some other convenient + // type. Type legalization will make adjustments once the shiftee is split. + else + Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); + } + + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + // Turn exact SDivs into multiplications. + // FIXME: This should be in DAGCombiner, but it doesn't have access to the + // exact bit. + if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && + !isa<ConstantSDNode>(Op1) && + isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) + setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + else + setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + Op1, Op2)); +} + +void SelectionDAGBuilder::visitICmp(const User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); +} + +void SelectionDAGBuilder::visitFCmp(const User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); +} + +void SelectionDAGBuilder::visitSelect(const User &I) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + SmallVector<SDValue, 4> Values(NumValues); + SDValue Cond = getValue(I.getOperand(0)); + SDValue TrueVal = getValue(I.getOperand(1)); + SDValue FalseVal = getValue(I.getOperand(2)); + ISD::NodeType OpCode = Cond.getValueType().isVector() ? + ISD::VSELECT : ISD::SELECT; + + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(OpCode, getCurDebugLoc(), + TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), + Cond, + SDValue(TrueVal.getNode(), + TrueVal.getResNo() + i), + SDValue(FalseVal.getNode(), + FalseVal.getResNo() + i)); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + +void SelectionDAGBuilder::visitTrunc(const User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitZExt(const User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSExt(const User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPTrunc(const User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); +} + +void SelectionDAGBuilder::visitFPExt(const User &I){ + // FPExt is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToUI(const User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToSI(const User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitUIToFP(const User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSIToFP(const User &I){ + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitPtrToInt(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); +} + +void SelectionDAGBuilder::visitIntToPtr(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); +} + +void SelectionDAGBuilder::visitBitCast(const User &I) { + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + + // BitCast assures us that source and destination are the same size so this is + // either a BITCAST or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + DestVT, N)); // convert types. + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGBuilder::visitInsertElement(const User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(2))); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGBuilder::visitExtractElement(const User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), InVec, InIdx)); +} + +// Utility for visitShuffleVector - Return true if every element in Mask, +// beginning from position Pos and ending in Pos+Size, falls within the +// specified sequential range [L, L+Pos). or is undef. +static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (Mask[i] >= 0 && Mask[i] != Low) + return false; + return true; +} + +void SelectionDAGBuilder::visitShuffleVector(const User &I) { + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); + + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); + unsigned MaskNumElts = Mask.size(); + + EVT VT = TLI.getValueType(I.getType()); + EVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); + return; + } + + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { + // Mask is longer than the source vectors and is a multiple of the source + // vectors. We can use concatenate vector to make the mask and vectors + // lengths match. + if (SrcNumElts*2 == MaskNumElts) { + // First check for Src1 in low and Src2 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + // Then check for Src2 in low and Src1 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src2, Src1)); + return; + } + } + + // Pad both vectors with undefs to make them the same length as the mask. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); + SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps1[0], NumConcat); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps2[0], NumConcat); + + // Readjust mask for new input vector length. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. The analysis is done by calculating + // the range of elements the mask access on both vectors. + int MinRange[2] = { static_cast<int>(SrcNumElts), + static_cast<int>(SrcNumElts)}; + int MaxRange[2] = {-1, -1}; + + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + unsigned Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; + } + + // Check if the access is smaller than the vector size and can we find + // a reasonable extract index. + int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not + // Extract. + int StartIdx[2]; // StartIdx to extract from + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { + RangeUse[Input] = 0; // Unused + StartIdx[Input] = 0; + continue; + } + + // Find a good start index that is a multiple of the mask length. Then + // see if the rest of the elements are in range. + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts <= SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. + } + + if (RangeUse[0] == 0 && RangeUse[1] == 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { + // Extract appropriate subvector and generate a vector shuffle + for (unsigned Input = 0; Input < 2; ++Input) { + SDValue &Src = Input == 0 ? Src1 : Src2; + if (RangeUse[Input] == 0) + Src = DAG.getUNDEF(VT); + else + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, + Src, DAG.getIntPtrConstant(StartIdx[Input])); + } + + // Calculate new mask. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + EVT EltVT = VT.getVectorElementType(); + EVT PtrVT = TLI.getPointerTy(); + SmallVector<SDValue,8> Ops; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + SDValue Res; + + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; + + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src, DAG.getConstant(Idx, PtrVT)); + } + + Ops.push_back(Res); + } + + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size())); +} + +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + Type *AggTy = I.getType(); + Type *ValTy = Op1->getType(); + bool IntoUndef = isa<UndefValue>(Op0); + bool FromUndef = isa<UndefValue>(Op1); + + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumAggValues); + + SDValue Agg = getValue(Op0); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + if (NumValValues) { + SDValue Val = getValue(Op1); + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + } + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&AggValueVTs[0], NumAggValues), + &Values[0], NumAggValues)); +} + +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { + const Value *Op0 = I.getOperand(0); + Type *AggTy = Op0->getType(); + Type *ValTy = I.getType(); + bool OutOfUndef = isa<UndefValue>(Op0); + + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + + // Ignore a extractvalue that produces an empty object + if (!NumValValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + + SmallVector<SDValue, 4> Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValValueVTs[0], NumValValues), + &Values[0], NumValValues)); +} + +void SelectionDAGBuilder::visitGetElementPtr(const User &I) { + SDValue N = getValue(I.getOperand(0)); + // Note that the pointer operand may be a vector of pointers. Take the scalar + // element which holds a pointer. + Type *Ty = I.getOperand(0)->getType()->getScalarType(); + + for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + const Value *Idx = *OI; + if (StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + DAG.getConstant(Offset, N.getValueType())); + } + + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) continue; + uint64_t Offs = + TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + SDValue OffsVal; + EVT PTy = TLI.getPointerTy(); + unsigned PtrBits = PTy.getSizeInBits(); + if (PtrBits < 64) + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(Offs, MVT::i64)); + else + OffsVal = DAG.getIntPtrConstant(Offs); + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + OffsVal); + continue; + } + + // N = N + Idx * ElementSize; + APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), + TD->getTypeAllocSize(Ty)); + SDValue IdxN = getValue(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (ElementSize.isPowerOf2()) { + unsigned Amt = ElementSize.logBase2(); + IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + N.getValueType(), IdxN, + DAG.getConstant(Amt, IdxN.getValueType())); + } else { + SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N.getValueType(), N, IdxN); + } + } + + setValue(&I, N); +} + +void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + Type *Ty = I.getAllocatedType(); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + EVT IntPtr = TLI.getPointerTy(); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize, + DAG.getConstant(TySize, IntPtr)); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. + AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(StackAlign-1)); + + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + VTs, Ops, 3); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + // Inform the Frame Information that we have just allocated a variable-sized + // object. + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); +} + +void SelectionDAGBuilder::visitLoad(const LoadInst &I) { + if (I.isAtomic()) + return visitAtomicLoad(I); + + const Value *SV = I.getOperand(0); + SDValue Ptr = getValue(SV); + + Type *Ty = I.getType(); + + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isInvariant = I.getMetadata("invariant.load") != 0; + unsigned Alignment = I.getAlignment(); + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (I.isVolatile() || NumValues > MaxParallelChains) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA->pointsToConstantMemory( + AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), + NumValues)); + EVT PtrVT = Ptr.getValueType(); + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // Serializing loads here may result in excessive register pressure, and + // TokenFactor places arbitrary choke points on the scheduler. SD scheduling + // could recover a bit by hoisting nodes upward in the chain by recognizing + // they are side-effect free or do not alias. The optimizer should really + // avoid this case by converting large object/array copies to llvm.memcpy + // (MaxParallelChains should always remain as failsafe). + if (ChainI == MaxParallelChains) { + assert(PendingLoads.empty() && "PendingLoads must be serialized first"); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } + SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + A, MachinePointerInfo(SV, Offsets[i]), isVolatile, + isNonTemporal, isInvariant, Alignment, TBAAInfo, + Ranges); + + Values[i] = L; + Chains[ChainI] = L.getValue(1); + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + +void SelectionDAGBuilder::visitStore(const StoreInst &I) { + if (I.isAtomic()) + return visitAtomicStore(I); + + const Value *SrcV = I.getOperand(0); + const Value *PtrV = I.getOperand(1); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), + NumValues)); + EVT PtrVT = Ptr.getValueType(); + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; + unsigned Alignment = I.getAlignment(); + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // See visitLoad comments. + if (ChainI == MaxParallelChains) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)); + SDValue St = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + Add, MachinePointerInfo(PtrV, Offsets[i]), + isVolatile, isNonTemporal, Alignment, TBAAInfo); + Chains[ChainI] = St; + } + + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + ++SDNodeOrder; + AssignOrderingToNode(StoreNode.getNode()); + DAG.setRoot(StoreNode); +} + +static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, + SynchronizationScope Scope, + bool Before, DebugLoc dl, + SelectionDAG &DAG, + const TargetLowering &TLI) { + // Fence, if necessary + if (Before) { + if (Order == AcquireRelease || Order == SequentiallyConsistent) + Order = Release; + else if (Order == Acquire || Order == Monotonic) + return Chain; + } else { + if (Order == AcquireRelease) + Order = Acquire; + else if (Order == Release || Order == Monotonic) + return Chain; + } + SDValue Ops[3]; + Ops[0] = Chain; + Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); + return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); +} + +void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { + DebugLoc dl = getCurDebugLoc(); + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue L = + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, + getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), + MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { + DebugLoc dl = getCurDebugLoc(); + ISD::NodeType NT; + switch (I.getOperation()) { + default: llvm_unreachable("Unknown atomicrmw operation"); + case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; + case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; + case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; + case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; + case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; + case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; + case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; + case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; + case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; + case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; + case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; + } + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue L = + DAG.getAtomic(NT, dl, + getValue(I.getValOperand()).getValueType().getSimpleVT(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValOperand()), + I.getPointerOperand(), 0 /* Alignment */, + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitFence(const FenceInst &I) { + DebugLoc dl = getCurDebugLoc(); + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); +} + +void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { + DebugLoc dl = getCurDebugLoc(); + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + EVT VT = TLI.getValueType(I.getType()); + + if (I.getAlignment() < VT.getSizeInBits() / 8) + report_fatal_error("Cannot generate unaligned atomic load"); + + SDValue L = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), + I.getPointerOperand(), I.getAlignment(), + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { + DebugLoc dl = getCurDebugLoc(); + + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); + + if (I.getAlignment() < VT.getSizeInBits() / 8) + report_fatal_error("Cannot generate unaligned atomic store"); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue OutChain = + DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValueOperand()), + I.getPointerOperand(), I.getAlignment(), + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + DAG.setRoot(OutChain); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + bool HasChain = !I.doesNotAccessMemory(); + bool OnlyLoad = HasChain && I.onlyReadsMemory(); + + // Build the operand list. + SmallVector<SDValue, 8> Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || + Info.opc == ISD::INTRINSIC_W_CHAIN) + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + + // Add all operands of the call to the operand list. + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + SDValue Op = getValue(I.getArgOperand(i)); + Ops.push_back(Op); + } + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); + + if (HasChain) + ValueVTs.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + VTs, &Ops[0], Ops.size(), + Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align, Info.vol, + Info.readMem, Info.writeMem); + } else if (!HasChain) { + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } else if (!I.getType()->isVoidTy()) { + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } else { + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (!I.getType()->isVoidTy()) { + if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + EVT VT = TLI.getValueType(PTy); + Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); + } + + setValue(&I, Result); + } else { + // Assign order to result here. If the intrinsic does not produce a result, + // it won't be mapped to a SDNode and visit() will not assign it an order + // number. + ++SDNodeOrder; + AssignOrderingToNode(Result.getNode()); + } +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexadecimal representation of floating point value. +static SDValue +GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexadecimal representation of floating point value. +static SDValue +GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, + DebugLoc dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, MVT::i32)); + SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, TLI.getPointerTy())); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue +getF32Constant(SelectionDAG &DAG, unsigned Flt) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), + MVT::f32); +} + +/// expandExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OFe 1.4426950f + // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x3fb8aa3b)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + SDValue TwoToFracPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // 0.000107046256 error, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); + } + + // No special expansion. + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); +} + +/// expandLog - Lower a log intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Scale the exponent by log(2) [0.69314718f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3f317218)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + SDValue LogOfMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // LogofMantissa = + // -1.1609546f + + // (1.4034025f - 0.23903021f * x) * x; + // + // error 0.0034276066, which is better than 8 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbe74c456)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3fb3a2b1)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // LogOfMantissa = + // -1.7417939f + + // (2.8212026f + + // (-1.4699568f + + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; + // + // error 0.000061011436, which is 14 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbd67b6d6)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ee4f4b8)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fbc278b)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40348e95)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // LogOfMantissa = + // -2.1072184f + + // (4.2372794f + + // (-3.7029485f + + // (2.2781945f + + // (-0.87823314f + + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; + // + // error 0.0000023660568, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbc91e5ac)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e4350aa)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f60d3e3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x4011cdf0)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x406cfd1c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x408797cb)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); +} + +/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Get the exponent. + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + // Different possible minimax approximations of significand in + // floating-point for various degrees of accuracy over [1,2]. + SDValue Log2ofMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; + // + // error 0.0049451742, which is more than 7 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbeb08fe0)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x40019463)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log2ofMantissa = + // -2.51285454f + + // (4.07009056f + + // (-2.12067489f + + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; + // + // error 0.0000876136000, which is better than 13 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbda7262e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f25280b)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x4007b923)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40823e2f)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log2ofMantissa = + // -3.0400495f + + // (6.1129976f + + // (-5.3420409f + + // (3.2865683f + + // (-1.2669343f + + // (0.27515199f - + // 0.25691327e-1f * x) * x) * x) * x) * x) * x; + // + // error 0.0000018516, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbcd2769e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e8ce0b9)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fa22ae7)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40525723)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x40aaf200)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x40c39dad)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); +} + +/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Scale the exponent by log10(2) [0.30102999f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3e9a209a)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + SDValue Log10ofMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log10ofMantissa = + // -0.50419619f + + // (0.60948995f - 0.10380950f * x) * x; + // + // error 0.0014886165, which is 6 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbdd49a13)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f1c0789)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log10ofMantissa = + // -0.64831180f + + // (0.91751397f + + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; + // + // error 0.00019228036, which is better than 12 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3d431f31)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ea21fb2)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f6ae232)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log10ofMantissa = + // -0.84299375f + + // (1.5327582f + + // (-1.0688956f + + // (0.49102474f + + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; + // + // error 0.0000037995730, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3c5d51ce)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e00685a)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3efb6798)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f88d192)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fc4316c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); +} + +/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, + TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); + } + + // No special expansion. + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); +} + +/// visitPow - Lower a pow intrinsic. Handles the special sequences for +/// limited-precision mode with x == 10.0f. +static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const TargetLowering &TLI) { + bool IsExp10 = false; + if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { + APFloat Ten(10.0f); + IsExp10 = LHSC->isExactlyValue(Ten); + } + } + + if (IsExp10) { + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OF10 3.3219281f + // IntegerPartOfX = (int32_t)(x * LOG2OF10); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, + getF32Constant(DAG, 0x40549a78)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // twoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + } + + SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); + } + + // No special expansion. + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); +} + + +/// ExpandPowI - Expand a llvm.powi intrinsic. +static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, + SelectionDAG &DAG) { + // If RHS is a constant, we can expand this out to a multiplication tree, + // otherwise we end up lowering to a call to __powidf2 (for example). When + // optimizing for size, we only want to do this if the expansion would produce + // a small number of multiplies, otherwise we do the full expansion. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { + // Get the exponent as a positive value. + unsigned Val = RHSC->getSExtValue(); + if ((int)Val < 0) Val = -Val; + + // powi(x, 0) -> 1.0 + if (Val == 0) + return DAG.getConstantFP(1.0, LHS.getValueType()); + + const Function *F = DAG.getMachineFunction().getFunction(); + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + // If optimizing for size, don't insert too many multiplies. This + // inserts up to 5 multiplies. + CountPopulation_32(Val)+Log2_32(Val) < 7) { + // We use the simple binary decomposition method to generate the multiply + // sequence. There are more optimal ways to do this (for example, + // powi(x,15) generates one more multiply than it should), but this has + // the benefit of being both really simple and much better than a libcall. + SDValue Res; // Logically starts equal to 1.0 + SDValue CurSquare = LHS; + while (Val) { + if (Val & 1) { + if (Res.getNode()) + Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + else + Res = CurSquare; // 1.0*CurSquare. + } + + CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), + CurSquare, CurSquare); + Val >>= 1; + } + + // If the original was negative, invert the result, producing 1/(x*x*x). + if (RHSC->getSExtValue() < 0) + Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), + DAG.getConstantFP(1.0, LHS.getValueType()), Res); + return Res; + } + } + + // Otherwise, expand to a libcall. + return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); +} + +// getTruncatedArgReg - Find underlying register used for an truncated +// argument. +static unsigned getTruncatedArgReg(const SDValue &N) { + if (N.getOpcode() != ISD::TRUNCATE) + return 0; + + const SDValue &Ext = N.getOperand(0); + if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + const SDValue &CFR = Ext.getOperand(0); + if (CFR.getOpcode() == ISD::CopyFromReg) + return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); + } + return 0; +} + +/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function +/// argument, create the corresponding DBG_VALUE machine instruction for it now. +/// At the end of instruction selection, they will be inserted to the entry BB. +bool +SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, + const SDValue &N) { + const Argument *Arg = dyn_cast<Argument>(V); + if (!Arg) + return false; + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + + // Ignore inlined function arguments here. + DIVariable DV(Variable); + if (DV.isInlinedFnArgument(MF.getFunction())) + return false; + + unsigned Reg = 0; + // Some arguments' frame index is recorded during argument lowering. + Offset = FuncInfo.getArgumentFrameIndex(Arg); + if (Offset) + Reg = TRI->getFrameRegister(MF); + + if (!Reg && N.getNode()) { + if (N.getOpcode() == ISD::CopyFromReg) + Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); + else + Reg = getTruncatedArgReg(N); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned PR = RegInfo.getLiveInPhysReg(Reg); + if (PR) + Reg = PR; + } + } + + if (!Reg) { + // Check if ValueMap has reg number. + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) + Reg = VMI->second; + } + + if (!Reg && N.getNode()) { + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { + Reg = TRI->getFrameRegister(MF); + Offset = FINode->getIndex(); + } + } + + if (!Reg) + return false; + + MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE)) + .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); + FuncInfo.ArgDbgValues.push_back(&*MIB); + return true; +} + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + +/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If +/// we want to emit this as a call to a named external function, return the name +/// otherwise lower it and return null. +const char * +SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { + DebugLoc dl = getCurDebugLoc(); + SDValue Res; + + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return 0; + case Intrinsic::vastart: visitVAStart(I); return 0; + case Intrinsic::vaend: visitVAEnd(I); return 0; + case Intrinsic::vacopy: visitVACopy(I); return 0; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::setjmp: + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + case Intrinsic::longjmp: + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + case Intrinsic::memcpy: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() + < 256 && + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); + return 0; + } + case Intrinsic::memset: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + MachinePointerInfo(I.getArgOperand(0)))); + return 0; + } + case Intrinsic::memmove: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() + < 256 && + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); + return 0; + } + case Intrinsic::dbg_declare: { + const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + MDNode *Variable = DI.getVariable(); + const Value *Address = DI.getAddress(); + if (!Address || !DIVariable(Variable).Verify()) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + return 0; + } + + // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder + // but do not always have a corresponding SDNode built. The SDNodeOrder + // absolute, but not relative, values are different depending on whether + // debug info exists. + ++SDNodeOrder; + + // Check if address has undef value. + if (isa<UndefValue>(Address) || + (Address->use_empty() && !isa<Argument>(Address))) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + return 0; + } + + SDValue &N = NodeMap[Address]; + if (!N.getNode() && isa<Argument>(Address)) + // Check unused arguments map. + N = UnusedArgNodeMap[Address]; + SDDbgValue *SDV; + if (N.getNode()) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + // Parameters are handled specially. + bool isParameter = + (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || + isa<Argument>(Address)); + + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); + + if (isParameter && !AI) { + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (FINode) + // Byval parameter. We have a frame index at this point. + SDV = DAG.getDbgValue(Variable, FINode->getIndex(), + 0, dl, SDNodeOrder); + else { + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, 0, N); + return 0; + } + } else if (AI) + SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), + 0, dl, SDNodeOrder); + else { + // Can't do anything with other non-AI cases yet. + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); + DEBUG(Address->dump()); + return 0; + } + DAG.AddDbgValue(SDV, N.getNode(), isParameter); + } else { + // If Address is an argument then try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + // If variable is pinned by a alloca in dominating bb then + // use StaticAllocaMap. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + if (AI->getParent() != DI.getParent()) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + SDV = DAG.getDbgValue(Variable, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + } + } + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + } + } + return 0; + } + case Intrinsic::dbg_value: { + const DbgValueInst &DI = cast<DbgValueInst>(I); + if (!DIVariable(DI.getVariable()).Verify()) + return 0; + + MDNode *Variable = DI.getVariable(); + uint64_t Offset = DI.getOffset(); + const Value *V = DI.getValue(); + if (!V) + return 0; + + // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder + // but do not always have a corresponding SDNode built. The SDNodeOrder + // absolute, but not relative, values are different depending on whether + // debug info exists. + ++SDNodeOrder; + SDDbgValue *SDV; + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { + SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } else { + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) + // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { + SDV = DAG.getDbgValue(Variable, N.getNode(), + N.getResNo(), Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + } + } else if (!V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + } else { + // We may expand this to cover more cases. One case where we have no + // data available is an unreferenced parameter. + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + } + } + + // Build a debug info table entry. + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + V = BCI->getOperand(0); + const AllocaInst *AI = dyn_cast<AllocaInst>(V); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) { + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); + return 0; + } + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return 0; // VLAs. + int FI = SI->second; + + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) + MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + return 0; + } + + case Intrinsic::eh_typeid_for: { + // Find the type id for the given typeinfo. + GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); + Res = DAG.getConstant(TypeID, MVT::i32); + setValue(&I, Res); + return 0; + } + + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + DAG.getMachineFunction().getMMI().setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + MVT::Other, + getControlRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; + case Intrinsic::eh_unwind_init: + DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); + return 0; + case Intrinsic::eh_dwarf_cfa: { + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, + TLI.getPointerTy()); + SDValue Offset = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + TLI.getPointerTy()), + CfaArg); + SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl, + TLI.getPointerTy(), + DAG.getConstant(0, TLI.getPointerTy())); + setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), + FA, Offset)); + return 0; + } + case Intrinsic::eh_sjlj_callsite: { + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); + assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); + assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); + + MMI.setCurrentCallSite(CI->getZExtValue()); + return 0; + } + case Intrinsic::eh_sjlj_functioncontext: { + // Get and store the index of the function context. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + AllocaInst *FnCtx = + cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); + int FI = FuncInfo.StaticAllocaMap[FnCtx]; + MFI->setFunctionContextIndex(FI); + return 0; + } + case Intrinsic::eh_sjlj_setjmp: { + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, + DAG.getVTList(MVT::i32, MVT::Other), + Ops, 2); + setValue(&I, Op.getValue(0)); + DAG.setRoot(Op.getValue(1)); + return 0; + } + case Intrinsic::eh_sjlj_longjmp: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, + getRoot(), getValue(I.getArgOperand(0)))); + return 0; + } + + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDValue ShAmt = getValue(I.getArgOperand(1)); + if (isa<ConstantSDNode>(ShAmt)) { + visitTargetIntrinsic(I, Intrinsic); + return 0; + } + unsigned NewIntrinsic = 0; + EVT ShAmtVT = MVT::v2i32; + switch (Intrinsic) { + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + // We must do this early because v2i32 is not a legal type. + SDValue ShOps[2]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI.getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(NewIntrinsic, MVT::i32), + getValue(I.getArgOperand(0)), ShAmt); + setValue(&I, Res); + return 0; + } + case Intrinsic::x86_avx_vinsertf128_pd_256: + case Intrinsic::x86_avx_vinsertf128_ps_256: + case Intrinsic::x86_avx_vinsertf128_si_256: + case Intrinsic::x86_avx2_vinserti128: { + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * + ElVT.getVectorNumElements(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getIntPtrConstant(Idx)); + setValue(&I, Res); + return 0; + } + case Intrinsic::x86_avx_vextractf128_pd_256: + case Intrinsic::x86_avx_vextractf128_ps_256: + case Intrinsic::x86_avx_vextractf128_si_256: + case Intrinsic::x86_avx2_vextracti128: { + EVT DestVT = TLI.getValueType(I.getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * + DestVT.getVectorNumElements(); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + DAG.getIntPtrConstant(Idx)); + setValue(&I, Res); + return 0; + } + case Intrinsic::convertff: + case Intrinsic::convertfsi: + case Intrinsic::convertfui: + case Intrinsic::convertsif: + case Intrinsic::convertuif: + case Intrinsic::convertss: + case Intrinsic::convertsu: + case Intrinsic::convertus: + case Intrinsic::convertuu: { + ISD::CvtCode Code = ISD::CVT_INVALID; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::convertff: Code = ISD::CVT_FF; break; + case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; + case Intrinsic::convertfui: Code = ISD::CVT_FU; break; + case Intrinsic::convertsif: Code = ISD::CVT_SF; break; + case Intrinsic::convertuif: Code = ISD::CVT_UF; break; + case Intrinsic::convertss: Code = ISD::CVT_SS; break; + case Intrinsic::convertsu: Code = ISD::CVT_SU; break; + case Intrinsic::convertus: Code = ISD::CVT_US; break; + case Intrinsic::convertuu: Code = ISD::CVT_UU; break; + } + EVT DestVT = TLI.getValueType(I.getType()); + const Value *Op1 = I.getArgOperand(0); + Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1), + DAG.getValueType(DestVT), + DAG.getValueType(getValue(Op1).getValueType()), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + Code); + setValue(&I, Res); + return 0; + } + case Intrinsic::powi: + setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); + return 0; + case Intrinsic::log: + setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + return 0; + case Intrinsic::log2: + setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + return 0; + case Intrinsic::log10: + setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + return 0; + case Intrinsic::exp: + setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + return 0; + case Intrinsic::exp2: + setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + return 0; + case Intrinsic::pow: + setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, TLI)); + return 0; + case Intrinsic::sqrt: + case Intrinsic::fabs: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::fabs: Opcode = ISD::FABS; break; + case Intrinsic::sin: Opcode = ISD::FSIN; break; + case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + } + + setValue(&I, DAG.getNode(Opcode, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + } + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return 0; + case Intrinsic::fmuladd: { + EVT VT = TLI.getValueType(I.getType()); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isFMAFasterThanMulAndAdd(VT)){ + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + } else { + SDValue Mul = DAG.getNode(ISD::FMUL, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); + SDValue Add = DAG.getNode(ISD::FADD, dl, + getValue(I.getArgOperand(0)).getValueType(), + Mul, + getValue(I.getArgOperand(2))); + setValue(&I, Add); + } + return 0; + } + case Intrinsic::convert_to_fp16: + setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, + MVT::i16, getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::convert_from_fp16: + setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, + MVT::f32, getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::pcmarker: { + SDValue Tmp = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::readcyclecounter: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::READCYCLECOUNTER, dl, + DAG.getVTList(MVT::i64, MVT::Other), + &Op, 1); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return 0; + } + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::cttz: { + SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, + dl, Ty, Arg)); + return 0; + } + case Intrinsic::ctlz: { + SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, + dl, Ty, Arg)); + return 0; + } + case Intrinsic::ctpop: { + SDValue Arg = getValue(I.getArgOperand(0)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); + return 0; + } + case Intrinsic::stacksave: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::STACKSAVE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return 0; + } + case Intrinsic::stackrestore: { + Res = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); + return 0; + } + case Intrinsic::stackprotector: { + // Emit code into the DAG to store the stack guard onto the stack. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + EVT PtrTy = TLI.getPointerTy(); + + SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); + + int FI = FuncInfo.StaticAllocaMap[Slot]; + MFI->setStackProtectorIndex(FI); + + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + + // Store the stack protector onto the stack. + Res = DAG.getStore(getRoot(), dl, Src, FIN, + MachinePointerInfo::getFixedStack(FI), + true, false, 0); + setValue(&I, Res); + DAG.setRoot(Res); + return 0; + } + case Intrinsic::objectsize: { + // If we don't know by now, we're never going to know. + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); + + assert(CI && "Non-constant type in __builtin_object_size?"); + + SDValue Arg = getValue(I.getCalledValue()); + EVT Ty = Arg.getValueType(); + + if (CI->isZero()) + Res = DAG.getConstant(-1ULL, Ty); + else + Res = DAG.getConstant(0, Ty); + + setValue(&I, Res); + return 0; + } + case Intrinsic::var_annotation: + // Discard annotate attributes + return 0; + + case Intrinsic::init_trampoline: { + const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); + + SDValue Ops[6]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); + Ops[5] = DAG.getSrcValue(F); + + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); + + DAG.setRoot(Res); + return 0; + } + case Intrinsic::adjust_trampoline: { + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, + TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); + return 0; + } + case Intrinsic::gcroot: + if (GFI) { + const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); + } + return 0; + case Intrinsic::gcread: + case Intrinsic::gcwrite: + llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); + case Intrinsic::flt_rounds: + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + return 0; + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); + return 0; + } + + case Intrinsic::debugtrap: + case Intrinsic::trap: { + StringRef TrapFuncName = TM.Options.getTrapFunctionName(); + if (TrapFuncName.empty()) { + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::TRAP : ISD::DEBUGTRAP; + DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); + return 0; + } + TargetLowering::ArgListTy Args; + TargetLowering:: + CallLoweringInfo CLI(getRoot(), I.getType(), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + Args, DAG, dl); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return 0; + } + + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2)); + return 0; + } + case Intrinsic::prefetch: { + SDValue Ops[5]; + unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, + DAG.getVTList(MVT::Other), + &Ops[0], 5, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + false, /* volatile */ + rw==0, /* read */ + rw==1)); /* write */ + return 0; + } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) + return 0; + + SmallVector<Value *, 4> Allocas; + GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + + for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { + AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + + // Could not find an Alloca. + if (!LifetimeObject) + continue; + + int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + + Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + DAG.setRoot(Res); + } + return 0; + } + case Intrinsic::invariant_start: + // Discard region information. + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return 0; + case Intrinsic::invariant_end: + // Discard region information. + return 0; + case Intrinsic::donothing: + // ignore + return 0; + } +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MCSymbol *BeginLabel = 0; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); + + bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); + + SDValue DemoteStackSlot; + int DemoteStackIdx = -100; + + if (!CanLowerReturn) { + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( + FTy->getReturnType()); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( + FTy->getReturnType()); + MachineFunction &MF = DAG.getMachineFunction(); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); + + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.Alignment = Align; + Args.push_back(Entry); + RetTy = Type::getVoidTy(FTy->getContext()); + } + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); + + unsigned attrInd = i - CS.arg_begin() + 1; + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.Alignment = CS.getParamAlignment(attrInd); + Args.push_back(Entry); + } + + if (LandingPad) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI.getContext().CreateTempSymbol(); + + // For SjLj, keep track of which landing pads go with which invokes + // so as to maintain the ordering of pads in the LSDA. + unsigned CallSiteIndex = MMI.getCurrentCallSite(); + if (CallSiteIndex) { + MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); + + // Now that the call site is handled, stop tracking it. + MMI.setCurrentCallSite(0); + } + + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI.LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, TLI)) + isTailCall = false; + + TargetLowering:: + CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, + getCurDebugLoc(), CS); + std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI); + assert((isTailCall || Result.second.getNode()) && + "Non-null chain expected with non-tail call!"); + assert((Result.second.getNode() || !Result.first.getNode()) && + "Null value expected with tail call!"); + if (Result.first.getNode()) { + setValue(CS.getInstruction(), Result.first); + } else if (!CanLowerReturn && Result.second.getNode()) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + + ComputeValueVTs(TLI, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + + SmallVector<EVT, 4> RetTys; + SmallVector<uint64_t, 4> Offsets; + RetTy = FTy->getReturnType(); + ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); + + unsigned NumValues = RetTys.size(); + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, + DemoteStackSlot, + DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), + false, false, false, 1); + Values[i] = L; + Chains[i] = L.getValue(1); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + PendingLoads.push_back(Chain); + + setValue(CS.getInstruction(), + DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&RetTys[0], RetTys.size()), + &Values[0], Values.size())); + } + + // Assign order to nodes here. If the call does not produce a result, it won't + // be mapped to a SDNode and visit() will not assign it an order number. + if (!Result.second.getNode()) { + // As a special case, a null chain means that a tail call has been emitted and + // the DAG root is already updated. + HasTailCall = true; + ++SDNodeOrder; + AssignOrderingToNode(DAG.getRoot().getNode()); + } else { + DAG.setRoot(Result.second); + ++SDNodeOrder; + AssignOrderingToNode(Result.second.getNode()); + } + + if (LandingPad) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); + + // Inform MachineModuleInfo of range. + MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + } +} + +/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, + Type *LoadTy, + SelectionDAGBuilder &Builder) { + + // Check to see if this load can be trivially constant folded, e.g. if the + // input is from a string literal. + if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { + // Cast pointer to the type we really want to load. + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), + PointerType::getUnqual(LoadTy)); + + if (const Constant *LoadCst = + ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), + Builder.TD)) + return Builder.getValue(LoadCst); + } + + // Otherwise, we have to emit the load. If the pointer is to unfoldable but + // still constant memory, the input chain can be the entry node. + SDValue Root; + bool ConstantMemory = false; + + // Do not serialize (non-volatile) loads of constant memory with anything. + if (Builder.AA->pointsToConstantMemory(PtrVal)) { + Root = Builder.DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = Builder.DAG.getRoot(); + } + + SDValue Ptr = Builder.getValue(PtrVal); + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, + Ptr, MachinePointerInfo(PtrVal), + false /*volatile*/, + false /*nontemporal*/, + false /*isinvariant*/, 1 /* align=1 */); + + if (!ConstantMemory) + Builder.PendingLoads.push_back(LoadVal.getValue(1)); + return LoadVal; +} + + +/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); + if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || + !I.getArgOperand(2)->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); + + // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 + // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 + if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + bool ActuallyDoIt = true; + MVT LoadVT; + Type *LoadTy; + switch (Size->getZExtValue()) { + default: + LoadVT = MVT::Other; + LoadTy = 0; + ActuallyDoIt = false; + break; + case 2: + LoadVT = MVT::i16; + LoadTy = Type::getInt16Ty(Size->getContext()); + break; + case 4: + LoadVT = MVT::i32; + LoadTy = Type::getInt32Ty(Size->getContext()); + break; + case 8: + LoadVT = MVT::i64; + LoadTy = Type::getInt64Ty(Size->getContext()); + break; + /* + case 16: + LoadVT = MVT::v4i32; + LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = VectorType::get(LoadTy, 4); + break; + */ + } + + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. + + // Require that we can find a legal MVT, and only do this if the target + // supports unaligned loads of that type. Expanding into byte loads would + // bloat the code. + if (ActuallyDoIt && Size->getZExtValue() > 4) { + // TODO: Handle 5 byte compare as 4-byte + 1 byte. + // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. + if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) + ActuallyDoIt = false; + } + + if (ActuallyDoIt) { + SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); + SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + + SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, + ISD::SETNE); + EVT CallVT = TLI.getValueType(I.getType(), true); + setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); + return true; + } + } + + + return false; +} + +/// visitUnaryFloatCall - If a call instruction is a unary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a unary floating-point call. + if (I.getNumArgOperands() != 1 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + return true; +} + +void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa<InlineAsm>(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + ComputeUsesVAFloatArgument(I, &MMI); + + const char *RenameFn = 0; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { + if (unsigned IID = II->getIntrinsicID(F)) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + if (unsigned IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: + if (I.getNumArgOperands() == 2 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.getType() == I.getArgOperand(1)->getType() && + I.onlyReadsMemory()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + break; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) + return; + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) + return; + break; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) + return; + break; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + if (visitUnaryFloatCall(I, ISD::FSQRT)) + return; + break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) + return; + break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) + return; + break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) + return; + break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) + return; + break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) + return; + break; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) + return; + break; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) + return; + break; + case LibFunc::memcmp: + if (visitMemCmpCall(I)) + return; + break; + } + } + } + + SDValue Callee; + if (!RenameFn) + Callee = getValue(I.getCalledValue()); + else + Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + + // Check if we can potentially perform a tail call. More detailed checking is + // be done within LowerCallTo, after more information about the call is known. + LowerCallTo(&I, Callee, I.isTailCall()); +} + +namespace { + +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { +public: + /// CallOperand - If this is the result output operand or a clobber + /// this is null, otherwise it is the incoming operand to the CallInst. + /// This gets modified as the asm is processed. + SDValue CallOperand; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) + : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + } + + /// getCallOperandValEVT - Return the EVT of the Value* that this operand + /// corresponds to. If there is no Value* for this operand, it returns + /// MVT::Other. + EVT getCallOperandValEVT(LLVMContext &Context, + const TargetLowering &TLI, + const DataLayout *TD) const { + if (CallOperandVal == 0) return MVT::Other; + + if (isa<BasicBlock>(CallOperandVal)) + return TLI.getPointerTy(); + + llvm::Type *OpTy = CallOperandVal->getType(); + + // FIXME: code duplicated from TargetLowering::ParseConstraints(). + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (isIndirect) { + llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpTy = IntegerType::get(Context, BitSize); + break; + } + } + + return TLI.getValueType(OpTy, true); + } +}; + +typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; + +} // end anonymous namespace + +/// GetRegistersForValue - Assign registers (virtual or physical) for the +/// specified operand. We prefer to assign virtual registers, to allow the +/// register allocator to handle the assignment process. However, if the asm +/// uses features that we can't model on machineinstrs, we have SDISel do the +/// allocation. This produces generally horrible, but correct, code. +/// +/// OpInfo describes the operand. +/// +static void GetRegistersForValue(SelectionDAG &DAG, + const TargetLowering &TLI, + DebugLoc DL, + SDISelAsmOperandInfo &OpInfo) { + LLVMContext &Context = *DAG.getContext(); + + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector<unsigned, 4> Regs; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + std::pair<unsigned, const TargetRegisterClass*> PhysReg = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) { + // If this is a FP input in an integer register (or visa versa) insert a bit + // cast of the input value. More generally, handle any case where the input + // value disagrees with the register class we plan to stick this in. + if (OpInfo.Type == InlineAsm::isInput && + PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + // Try to convert to the first EVT that the reg class contains. If the + // types are identical size, use a bitcast to convert (e.g. two differing + // vector types). + MVT RegVT = *PhysReg.second->vt_begin(); + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { + // If the input is a FP value and we want it in FP registers, do a + // bitcast to the corresponding integer type. This turns an f64 value + // into i64, which can be passed with two i32 values on a 32-bit + // machine. + RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } + } + + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + } + + MVT RegVT; + EVT ValueVT = OpInfo.ConstraintVT; + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + if (unsigned AssignedReg = PhysReg.first) { + const TargetRegisterClass *RC = PhysReg.second; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = *RC->vt_begin(); + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + RegVT = *RC->vt_begin(); + + // This is a explicit reference to a physical register. + Regs.push_back(AssignedReg); + + // If this is an expanded reference, add the rest of the regs to Regs. + if (NumRegs != 1) { + TargetRegisterClass::iterator I = RC->begin(); + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "Didn't find reg!"); + + // Already added the first reg. + --NumRegs; ++I; + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Regs.push_back(*I); + } + } + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + return; + } + + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + if (const TargetRegisterClass *RC = PhysReg.second) { + RegVT = *RC->vt_begin(); + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Create the appropriate number of virtual registers. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + return; + } + + // Otherwise, we couldn't allocate enough registers for this. +} + +/// visitInlineAsm - Handle a call to an InlineAsm object. +/// +void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + + /// ConstraintOperands - Information about all of the constraints. + SDISelAsmOperandInfoVector ConstraintOperands; + + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI.ParseConstraints(CS); + + bool hasMemory = false; + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); + SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + MVT OpVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpVT = TLI.getSimpleValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // If this is an input or an indirect output, process the call argument. + // BasicBlocks are labels, currently appearing only in asm's. + if (OpInfo.CallOperandVal) { + if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + } + + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). + getSimpleVT(); + } + + OpInfo.ConstraintVT = OpVT; + + // Indirect operand accesses access memory. + if (OpInfo.isIndirect) + hasMemory = true; + else { + for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType + CType = TLI.getConstraintType(OpInfo.Codes[j]); + if (CType == TargetLowering::C_Memory) { + hasMemory = true; + break; + } + } + } + } + + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + + // Second pass over the constraints: compute which constraint option to use + // and assign registers to constraints that want a specific physreg. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + Input.ConstraintVT = OpInfo.ConstraintVT; + } + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is a memory input, and if the operand is not indirect, do what we + // need to to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert((OpInfo.isMultipleAlternative || + (OpInfo.Type == InlineAsm::isInput)) && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. If we don't have + // an indirect input, put it in the constpool if we can, otherwise spill + // it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + const Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), + TLI.getPointerTy()); + } else { + // Otherwise, create a stack slot and emit a store to it before the + // asm. + Type *Ty = OpVal->getType(); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Chain = DAG.getStore(Chain, getCurDebugLoc(), + OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), + false, false, 0); + OpInfo.CallOperand = StackSlot; + } + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = 0; + + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + // If this constraint is for a specific register, allocate it before + // anything else. + if (OpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + } + + // Second pass - Loop over all of the operands, assigning virtual or physregs + // to register class operands. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // C_Register operands have already been allocated, Other/Memory don't need + // to be. + if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector<SDValue> AsmNodeOperands; + AsmNodeOperands.push_back(SDValue()); // reserve space for input chain + AsmNodeOperands.push_back( + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), + TLI.getPointerTy())); + + // If we have a !srcloc metadata node associated with it, we want to attach + // this to the ultimately generated inline asm machineinstr. To do this, we + // pass in the third operand as this (potentially null) inline asm MDNode. + const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // bits as operand 3. + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + // Set the asm dialect. + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // Ideally, we would only check against memory constraints. However, the + // meaning of an other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for other constriants as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + ExtraInfo |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + ExtraInfo |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + } + } + + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, + TLI.getPointerTy())); + + // Loop over all of the inputs, copying the operand values into the + // appropriate registers and processing the output regs. + RegsForValue RetValRegs; + + // IndirectStoresToEmit - The set of stores to emit after the inline asm node. + std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + switch (OpInfo.Type) { + case InlineAsm::isOutput: { + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + // Memory output, or 'other' output (e.g. 'X' constraint). + assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + + // Add information to the INLINEASM node to know about this output. + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, + TLI.getPointerTy())); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } + + // Otherwise, this is a register or register class output. + + // Copy the output from the appropriate register. Find a register that + // we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } + + // If this is an indirect operand, store through the pointer after the + // asm. + if (OpInfo.isIndirect) { + IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, + OpInfo.CallOperandVal)); + } else { + // This is the result value of the call. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + // Concatenate this output onto the outputs list. + RetValRegs.append(OpInfo.AssignedRegs); + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? + InlineAsm::Kind_RegDefEarlyClobber : + InlineAsm::Kind_RegDef, + false, + 0, + DAG, + AsmNodeOperands); + break; + } + case InlineAsm::isInput: { + SDValue InOperandVal = OpInfo.CallOperand; + + if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + // If this is required to match an output register we have already set, + // just use its register. + unsigned OperandNo = OpInfo.getMatchedOperand(); + + // Scan until we find the definition we already emitted of this operand. + // When we find it, create a RegsForValue operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; + } + + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + if (InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { + // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + if (OpInfo.isIndirect) { + // This happens on gcc/testsuite/gcc.dg/pr8788-1.c + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + report_fatal_error("Cannot handle indirect register inputs!"); + } + + RegsForValue MatchedRegs; + MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); + MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); + MatchedRegs.RegVTs.push_back(RegVT); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); + i != e; ++i) + MatchedRegs.Regs.push_back + (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); + + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag, CS.getInstruction()); + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, + true, OpInfo.getMatchedOperand(), + DAG, AsmNodeOperands); + break; + } + + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); + assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, + OpInfo.getMatchedOperand()); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; + } + + // Treat indirect 'X' constraint as memory. + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) + OpInfo.ConstraintType = TargetLowering::C_Memory; + + if (OpInfo.ConstraintType == TargetLowering::C_Other) { + std::vector<SDValue> Ops; + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + Ops, DAG); + if (Ops.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + break; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == TLI.getPointerTy() && + "Memory operands expect pointer values"); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + + // TODO: Support this. + if (OpInfo.isIndirect) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "Don't know how to handle indirect register inputs yet " + "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); + break; + } + + // Copy the input into the appropriate registers. + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag, CS.getInstruction()); + + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isClobber: { + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, + false, 0, DAG, + AsmNodeOperands); + break; + } + } + } + + // Finish up input operands. Set the input chain and add the flag last. + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + if (Flag.getNode()) AsmNodeOperands.push_back(Flag); + + Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + DAG.getVTList(MVT::Other, MVT::Glue), + &AsmNodeOperands[0], AsmNodeOperands.size()); + Flag = Chain.getValue(1); + + // If this asm returns a register value, copy the result from that register + // and set it as the value of the call. + if (!RetValRegs.Regs.empty()) { + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + Chain, &Flag, CS.getInstruction()); + + // FIXME: Why don't we do this for inline asms with MRVs? + if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { + EVT ResultType = TLI.getValueType(CS.getType()); + + // If any of the results of the inline asm is a vector, it may have the + // wrong width/num elts. This can happen for register classes that can + // contain multiple different value types. The preg or vreg allocated may + // not have the same VT as was expected. Convert it to the right type + // with bit_convert. + if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + ResultType, Val); + + } else if (ResultType != Val.getValueType() && + ResultType.isInteger() && Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result may + // have a wider width than the expected result. Extract the relevant + // portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + } + + assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + } + + setValue(CS.getInstruction(), Val); + // Don't need to use this as a chain in this case. + if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) + return; + } + + std::vector<std::pair<SDValue, const Value *> > StoresToEmit; + + // Process indirect outputs, first output all of the flagged copies out of + // physregs. + for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { + RegsForValue &OutRegs = IndirectStoresToEmit[i].first; + const Value *Ptr = IndirectStoresToEmit[i].second; + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + Chain, &Flag, IA); + StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + } + + // Emit the non-flagged stores from the physregs. + SmallVector<SDValue, 8> OutChains; + for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { + SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), + StoresToEmit[i].first, + getValue(StoresToEmit[i].second), + MachinePointerInfo(StoresToEmit[i].second), + false, false, 0); + OutChains.push_back(Val); + } + + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &OutChains[0], OutChains.size()); + + DAG.setRoot(Chain); +} + +void SelectionDAGBuilder::visitVAStart(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const DataLayout &TD = *TLI.getDataLayout(); + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + getRoot(), getValue(I.getOperand(0)), + DAG.getSrcValue(I.getOperand(0)), + TD.getABITypeAlignment(I.getType())); + setValue(&I, V); + DAG.setRoot(V.getValue(1)); +} + +void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVACopy(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); +} + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just calls LowerCall. +/// FIXME: When all targets are +/// migrated to using LowerCall, this hook should be integrated into SDISel. +std::pair<SDValue, SDValue> +TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle all of the outgoing arguments. + CLI.Outs.clear(); + CLI.OutVals.clear(); + ArgListTy &Args = CLI.Args; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getDataLayout()->getABITypeAlignment(ArgTy); + + if (Args[i].isZExt) + Flags.setZExt(); + if (Args[i].isSExt) + Flags.setSExt(); + if (Args[i].isInReg) + Flags.setInReg(); + if (Args[i].isSRet) + Flags.setSRet(); + if (Args[i].isByVal) { + Flags.setByVal(); + PointerType *Ty = cast<PointerType>(Args[i].Ty); + Type *ElementTy = Ty->getElementType(); + Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); + // For ByVal, alignment should come from FE. BE will guess if this + // info is not there but there are cases it cannot get right. + unsigned FrameAlign; + if (Args[i].Alignment) + FrameAlign = Args[i].Alignment; + else + FrameAlign = getByValTypeAlignment(ElementTy); + Flags.setByValAlign(FrameAlign); + } + if (Args[i].isNest) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + SmallVector<SDValue, 4> Parts(NumParts); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (Args[i].isSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtendKind = ISD::ZERO_EXTEND; + + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, + PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); + + for (unsigned j = 0; j != NumParts; ++j) { + // if it isn't first piece, alignment must be 1 + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + i < CLI.NumFixedArgs, + i, j*Parts[j].getValueType().getStoreSize()); + if (NumParts > 1 && j == 0) + MyFlags.Flags.setSplit(); + else if (j != 0) + MyFlags.Flags.setOrigAlign(1); + + CLI.Outs.push_back(MyFlags); + CLI.OutVals.push_back(Parts[j]); + } + } + } + + // Handle the incoming return values from the call. + CLI.Ins.clear(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(*this, CLI.RetTy, RetTys); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + + SmallVector<SDValue, 4> InVals; + CLI.Chain = LowerCall(CLI, InVals); + + // Verify that the target's LowerCall behaved as expected. + assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && + "LowerCall didn't return a valid chain!"); + assert((!CLI.IsTailCall || InVals.empty()) && + "LowerCall emitted a return value for a tail call!"); + assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && + "LowerCall didn't emit the correct number of values!"); + + // For a tail call, the return value is merely live-out and there aren't + // any nodes in the DAG representing it. Return a special value to + // indicate that a tail call has been emitted and no more Instructions + // should be processed in the current block. + if (CLI.IsTailCall) { + CLI.DAG.setRoot(CLI.Chain); + return std::make_pair(SDValue(), SDValue()); + } + + DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerCall emitted a null value!"); + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + }); + + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (CLI.RetSExt) + AssertOp = ISD::AssertSext; + else if (CLI.RetZExt) + AssertOp = ISD::AssertZext; + SmallVector<SDValue, 4> ReturnValues; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], + NumRegs, RegisterVT, VT, NULL, + AssertOp)); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actually look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), CLI.Chain); + + SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, + CLI.DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size()); + return std::make_pair(Res, CLI.Chain); +} + +void TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + if (Res.getNode()) + Results.push_back(Res); +} + +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + llvm_unreachable("LowerOperation not implemented for this target!"); +} + +void +SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { + SDValue Op = getNonRegisterValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + + RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); + PendingExports.push_back(Chain); +} + +#include "llvm/CodeGen/SelectionDAGISel.h" + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + if (FastISel) + return A->use_empty(); + + const BasicBlock *Entry = A->getParent()->begin(); + for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) + return false; // Use not in entry block. + } + return true; +} + +void SelectionDAGISel::LowerArguments(const Function &F) { + SelectionDAG &DAG = SDB->DAG; + DebugLoc dl = SDB->getCurDebugLoc(); + const DataLayout *TD = TLI.getDataLayout(); + SmallVector<ISD::InputArg, 16> Ins; + + if (!FuncInfo->CanLowerReturn) { + // Put in an sret pointer parameter before all the other parameters. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + + // NOTE: Assuming that a pointer will never break down to more than one VT + // or one register. + ISD::ArgFlagsTy Flags; + Flags.setSRet(); + MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); + Ins.push_back(RetArg); + } + + // Set up the incoming argument description vector. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++Idx) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); + bool isArgValueUsed = !I->use_empty(); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + TD->getABITypeAlignment(ArgTy); + + if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + Flags.setZExt(); + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + Flags.setSExt(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) + Flags.setInReg(); + if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) + Flags.setSRet(); + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + Flags.setByVal(); + PointerType *Ty = cast<PointerType>(I->getType()); + Type *ElementTy = Ty->getElementType(); + Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + unsigned FrameAlign; + if (F.getParamAlignment(Idx)) + FrameAlign = F.getParamAlignment(Idx); + else + FrameAlign = TLI.getByValTypeAlignment(ElementTy); + Flags.setByValAlign(FrameAlign); + } + if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, + Idx-1, i*RegisterVT.getStoreSize()); + if (NumRegs > 1 && i == 0) + MyFlags.Flags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) + MyFlags.Flags.setOrigAlign(1); + Ins.push_back(MyFlags); + } + } + } + + // Call the target to set up the argument values. + SmallVector<SDValue, 8> InVals; + SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), + F.isVarArg(), Ins, + dl, DAG, InVals); + + // Verify that the target's LowerFormalArguments behaved as expected. + assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && + "LowerFormalArguments didn't return a valid chain!"); + assert(InVals.size() == Ins.size() && + "LowerFormalArguments didn't emit the correct number of values!"); + DEBUG({ + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + } + }); + + // Update the DAG with the new chain value resulting from argument lowering. + DAG.setRoot(NewRoot); + + // Set up the argument values. + unsigned i = 0; + Idx = 1; + if (!FuncInfo->CanLowerReturn) { + // Create a virtual register for the sret pointer, and put in a copy + // from the sret argument into it. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + MVT VT = ValueVTs[0].getSimpleVT(); + MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + ISD::NodeType AssertOp = ISD::DELETED_NODE; + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, + RegVT, VT, NULL, AssertOp); + + MachineFunction& MF = SDB->DAG.getMachineFunction(); + MachineRegisterInfo& RegInfo = MF.getRegInfo(); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); + FuncInfo->DemoteRegister = SRetReg; + NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), + SRetReg, ArgValue); + DAG.setRoot(NewRoot); + + // i indexes lowered arguments. Bump it past the hidden sret argument. + // Idx indexes LLVM arguments. Don't touch it. + ++i; + } + + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector<SDValue, 4> ArgValues; + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + if (I->use_empty() && NumValues) + SDB->setUnusedArgValue(I, InVals[i]); + + for (unsigned Val = 0; Val != NumValues; ++Val) { + EVT VT = ValueVTs[Val]; + MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); + + if (!I->use_empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], + NumParts, PartVT, VT, + NULL, AssertOp)); + } + + i += NumParts; + } + + // We don't need to do anything else for unused arguments. + if (ArgValues.empty()) + continue; + + // Note down frame index. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + + SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDB->getCurDebugLoc()); + + SDB->setValue(I, Res); + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (LoadSDNode *LNode = + dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + } + + // If this argument is live outside of the entry block, insert a copy from + // wherever we got it to the vreg that other BB's will reference it as. + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + // If we can, though, try to skip creating an unnecessary vreg. + // FIXME: This isn't very clean... it would be nice to make this more + // general. It's also subtly incompatible with the hacks FastISel + // uses with vregs. + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + FuncInfo->ValueMap[I] = Reg; + continue; + } + } + if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(I); + SDB->CopyToExportRegsIfNeeded(I); + } + } + + assert(i == InVals.size() && "Argument register count mismatch!"); + + // Finally, if the target has anything special to do, allow it to do so. + // FIXME: this should insert code into the DAG! + EmitFunctionEntryCode(); +} + +/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to +/// ensure constants are generated when needed. Remember the virtual registers +/// that need to be added to the Machine PHI nodes as input. We cannot just +/// directly add them, because expansion might result in multiple MBB's for one +/// BB. As such, the start of the BB might correspond to a different MBB than +/// the end. +/// +void +SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + + unsigned Reg; + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo.CreateRegs(C->getType()); + CopyValueToVirtualRegister(C, RegOut); + } + Reg = RegOut; + } else { + DenseMap<const Value *, unsigned>::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { + assert(isa<AllocaInst>(PHIOp) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo.CreateRegs(PHIOp->getType()); + CopyValueToVirtualRegister(PHIOp, Reg); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + Reg += NumRegisters; + } + } + } + ConstantsOut.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h new file mode 100644 index 0000000..9188945 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -0,0 +1,559 @@ +//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAGBUILDER_H +#define SELECTIONDAGBUILDER_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" +#include <vector> + +namespace llvm { + +class AliasAnalysis; +class AllocaInst; +class BasicBlock; +class BitCastInst; +class BranchInst; +class CallInst; +class DbgValueInst; +class ExtractElementInst; +class ExtractValueInst; +class FCmpInst; +class FPExtInst; +class FPToSIInst; +class FPToUIInst; +class FPTruncInst; +class Function; +class FunctionLoweringInfo; +class GetElementPtrInst; +class GCFunctionInfo; +class ICmpInst; +class IntToPtrInst; +class IndirectBrInst; +class InvokeInst; +class InsertElementInst; +class InsertValueInst; +class Instruction; +class LoadInst; +class MachineBasicBlock; +class MachineInstr; +class MachineRegisterInfo; +class MDNode; +class PHINode; +class PtrToIntInst; +class ReturnInst; +class SDDbgValue; +class SExtInst; +class SelectInst; +class ShuffleVectorInst; +class SIToFPInst; +class StoreInst; +class SwitchInst; +class DataLayout; +class TargetLibraryInfo; +class TargetLowering; +class TruncInst; +class UIToFPInst; +class UnreachableInst; +class VAArgInst; +class ZExtInst; + +//===----------------------------------------------------------------------===// +/// SelectionDAGBuilder - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// +class SelectionDAGBuilder { + /// CurDebugLoc - current file + line number. Changes as we build the DAG. + DebugLoc CurDebugLoc; + + DenseMap<const Value*, SDValue> NodeMap; + + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap<const Value*, SDValue> UnusedArgNodeMap; + + /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. + class DanglingDebugInfo { + const DbgValueInst* DI; + DebugLoc dl; + unsigned SDNodeOrder; + public: + DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : + DI(di), dl(DL), SDNodeOrder(SDNO) { } + const DbgValueInst* getDI() { return DI; } + DebugLoc getdl() { return dl; } + unsigned getSDNodeOrder() { return SDNodeOrder; } + }; + + /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not + /// yet seen the referent. We defer handling these until we do see it. + DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; + +public: + /// PendingLoads - Loads are not emitted to the program immediately. We bunch + /// them up and then emit token factor nodes when possible. This allows us to + /// get simple disambiguation between loads without worrying about alias + /// analysis. + SmallVector<SDValue, 8> PendingLoads; +private: + + /// PendingExports - CopyToReg nodes that copy values to virtual registers + /// for export to other blocks need to be emitted before any terminator + /// instruction, but they have no other ordering requirements. We bunch them + /// up and the emit a single tokenfactor for them just before terminator + /// instructions. + SmallVector<SDValue, 8> PendingExports; + + /// SDNodeOrder - A unique monotonically increasing number used to order the + /// SDNodes we create. + unsigned SDNodeOrder; + + /// Case - A struct to record the Value for a switch case, and the + /// case's target basic block. + struct Case { + const Constant *Low; + const Constant *High; + MachineBasicBlock* BB; + uint32_t ExtraWeight; + + Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } + Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, + uint32_t extraweight) : Low(low), High(high), BB(bb), + ExtraWeight(extraweight) { } + + APInt size() const { + const APInt &rHigh = cast<ConstantInt>(High)->getValue(); + const APInt &rLow = cast<ConstantInt>(Low)->getValue(); + return (rHigh - rLow + 1ULL); + } + }; + + struct CaseBits { + uint64_t Mask; + MachineBasicBlock* BB; + unsigned Bits; + uint32_t ExtraWeight; + + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, + uint32_t Weight): + Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } + }; + + typedef std::vector<Case> CaseVector; + typedef std::vector<CaseBits> CaseBitsVector; + typedef CaseVector::iterator CaseItr; + typedef std::pair<CaseItr, CaseItr> CaseRange; + + /// CaseRec - A struct with ctor used in lowering switches to a binary tree + /// of conditional branches. + struct CaseRec { + CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, + CaseRange r) : + CaseBB(bb), LT(lt), GE(ge), Range(r) {} + + /// CaseBB - The MBB in which to emit the compare and branch + MachineBasicBlock *CaseBB; + /// LT, GE - If nonzero, we know the current case value must be less-than or + /// greater-than-or-equal-to these Constants. + const Constant *LT; + const Constant *GE; + /// Range - A pair of iterators representing the range of case values to be + /// processed at this point in the binary search tree. + CaseRange Range; + }; + + typedef std::vector<CaseRec> CaseRecVector; + + struct CaseBitsCmp { + bool operator()(const CaseBits &C1, const CaseBits &C2) { + return C1.Bits > C2.Bits; + } + }; + + size_t Clusterify(CaseVector &Cases, const SwitchInst &SI); + + /// CaseBlock - This structure is used to communicate between + /// SelectionDAGBuilder and SDISel for the code generation of additional basic + /// blocks needed by multi-case switch statements. + struct CaseBlock { + CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, + const Value *cmpmiddle, + MachineBasicBlock *truebb, MachineBasicBlock *falsebb, + MachineBasicBlock *me, + uint32_t trueweight = 0, uint32_t falseweight = 0) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), + TrueWeight(trueweight), FalseWeight(falseweight) { } + + // CC - the condition code to use for the case block's setcc node + ISD::CondCode CC; + + // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. + // Emit by default LHS op RHS. MHS is used for range comparisons: + // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). + const Value *CmpLHS, *CmpMHS, *CmpRHS; + + // TrueBB/FalseBB - the block to branch to if the setcc is true/false. + MachineBasicBlock *TrueBB, *FalseBB; + + // ThisBB - the block into which to emit the code for the setcc and branches + MachineBasicBlock *ThisBB; + + // TrueWeight/FalseWeight - branch weights. + uint32_t TrueWeight, FalseWeight; + }; + + struct JumpTable { + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} + + /// Reg - the virtual register containing the index of the jump table entry + //. to jump to. + unsigned Reg; + /// JTI - the JumpTableIndex for this jump table in the function. + unsigned JTI; + /// MBB - the MBB into which to emit the code for the indirect jump. + MachineBasicBlock *MBB; + /// Default - the MBB of the default bb, which is a successor of the range + /// check MBB. This is when updating PHI nodes in successors. + MachineBasicBlock *Default; + }; + struct JumpTableHeader { + JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, + bool E = false): + First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + APInt First; + APInt Last; + const Value *SValue; + MachineBasicBlock *HeaderBB; + bool Emitted; + }; + typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; + + struct BitTestCase { + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + uint32_t Weight): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } + uint64_t Mask; + MachineBasicBlock *ThisBB; + MachineBasicBlock *TargetBB; + uint32_t ExtraWeight; + }; + + typedef SmallVector<BitTestCase, 3> BitTestInfo; + + struct BitTestBlock { + BitTestBlock(APInt F, APInt R, const Value* SV, + unsigned Rg, MVT RgVT, bool E, + MachineBasicBlock* P, MachineBasicBlock* D, + const BitTestInfo& C): + First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), + Parent(P), Default(D), Cases(C) { } + APInt First; + APInt Range; + const Value *SValue; + unsigned Reg; + MVT RegVT; + bool Emitted; + MachineBasicBlock *Parent; + MachineBasicBlock *Default; + BitTestInfo Cases; + }; + +public: + // TLI - This is information that describes the available target features we + // need for lowering. This indicates when operations are unavailable, + // implemented with a libcall, etc. + const TargetMachine &TM; + const TargetLowering &TLI; + SelectionDAG &DAG; + const DataLayout *TD; + AliasAnalysis *AA; + const TargetLibraryInfo *LibInfo; + + /// SwitchCases - Vector of CaseBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate + /// SwitchInst code generation information. + std::vector<JumpTableBlock> JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<BitTestBlock> BitTestCases; + + // Emit PHI-node-operand constants only once even if used by multiple + // PHI nodes. + DenseMap<const Constant *, unsigned> ConstantsOut; + + /// FuncInfo - Information about the function as a whole. + /// + FunctionLoweringInfo &FuncInfo; + + /// OptLevel - What optimization level we're generating code for. + /// + CodeGenOpt::Level OptLevel; + + /// GFI - Garbage collection metadata for the function. + GCFunctionInfo *GFI; + + /// LPadToCallSiteMap - Map a landing pad to the call site indexes. + DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap; + + /// HasTailCall - This is set to true if a call in the current + /// block has been translated as a tail call. In this case, + /// no subsequent DAG nodes should be created. + /// + bool HasTailCall; + + LLVMContext *Context; + + SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, + CodeGenOpt::Level ol) + : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + HasTailCall(false) { + } + + void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li); + + /// clear - Clear out the current SelectionDAG and the associated + /// state and prepare this SelectionDAGBuilder object to be used + /// for a new block. This doesn't clear out information about + /// additional blocks that are needed to complete switch lowering + /// or PHI node updating; that information is cleared out as it is + /// consumed. + void clear(); + + /// clearDanglingDebugInfo - Clear the dangling debug information + /// map. This function is separated from the clear so that debug + /// information that is dangling in a basic block can be properly + /// resolved in a different basic block. This allows the + /// SelectionDAG to resolve dangling debug information attached + /// to PHI nodes. + void clearDanglingDebugInfo(); + + /// getRoot - Return the current virtual root of the Selection DAG, + /// flushing any PendingLoad items. This must be done before emitting + /// a store or any other node that may need to be ordered after any + /// prior load instructions. + /// + SDValue getRoot(); + + /// getControlRoot - Similar to getRoot, but instead of flushing all the + /// PendingLoad items, flush all the PendingExports items. It is necessary + /// to do this before emitting a terminator instruction. + /// + SDValue getControlRoot(); + + DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + + unsigned getSDNodeOrder() const { return SDNodeOrder; } + + void CopyValueToVirtualRegister(const Value *V, unsigned Reg); + + /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten + /// from how the code appeared in the source. The ordering is used by the + /// scheduler to effectively turn off scheduling. + void AssignOrderingToNode(const SDNode *Node); + + void visit(const Instruction &I); + + void visit(unsigned Opcode, const User &I); + + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, + // generate the debug data structures now that we've seen its definition. + void resolveDanglingDebugInfo(const Value *V, SDValue Val); + SDValue getValue(const Value *V); + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); + + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, unsigned Opc); + void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB); + bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); + bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(const Value *V); + void ExportFromCurrentBlock(const Value *V); + void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, + MachineBasicBlock *LandingPad = NULL); + + /// UpdateSplitBlock - When an MBB was split during scheduling, update the + /// references that ned to refer to the last resulting block. + void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + +private: + // Terminator instructions. + void visitRet(const ReturnInst &I); + void visitBr(const BranchInst &I); + void visitSwitch(const SwitchInst &I); + void visitIndirectBr(const IndirectBrInst &I); + void visitUnreachable(const UnreachableInst &I) { /* noop */ } + + // Helpers for visitSwitch + bool handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); + bool handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); + bool handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); + bool handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); + + uint32_t getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; + void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, + uint32_t Weight = 0); +public: + void visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB); + void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); + void visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB); + void visitJumpTable(JumpTable &JT); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB); + +private: + // These all get lowered before this pass. + void visitInvoke(const InvokeInst &I); + void visitResume(const ResumeInst &I); + + void visitBinary(const User &I, unsigned OpCode); + void visitShift(const User &I, unsigned Opcode); + void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } + void visitSub(const User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(const User &I); + void visitMul(const User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } + void visitURem(const User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(const User &I); + void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (const User &I) { visitBinary(I, ISD::AND); } + void visitOr (const User &I) { visitBinary(I, ISD::OR); } + void visitXor (const User &I) { visitBinary(I, ISD::XOR); } + void visitShl (const User &I) { visitShift(I, ISD::SHL); } + void visitLShr(const User &I) { visitShift(I, ISD::SRL); } + void visitAShr(const User &I) { visitShift(I, ISD::SRA); } + void visitICmp(const User &I); + void visitFCmp(const User &I); + // Visit the conversion instructions + void visitTrunc(const User &I); + void visitZExt(const User &I); + void visitSExt(const User &I); + void visitFPTrunc(const User &I); + void visitFPExt(const User &I); + void visitFPToUI(const User &I); + void visitFPToSI(const User &I); + void visitUIToFP(const User &I); + void visitSIToFP(const User &I); + void visitPtrToInt(const User &I); + void visitIntToPtr(const User &I); + void visitBitCast(const User &I); + + void visitExtractElement(const User &I); + void visitInsertElement(const User &I); + void visitShuffleVector(const User &I); + + void visitExtractValue(const ExtractValueInst &I); + void visitInsertValue(const InsertValueInst &I); + void visitLandingPad(const LandingPadInst &I); + + void visitGetElementPtr(const User &I); + void visitSelect(const User &I); + + void visitAlloca(const AllocaInst &I); + void visitLoad(const LoadInst &I); + void visitStore(const StoreInst &I); + void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); + void visitAtomicRMW(const AtomicRMWInst &I); + void visitFence(const FenceInst &I); + void visitPHI(const PHINode &I); + void visitCall(const CallInst &I); + bool visitMemCmpCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); + void visitAtomicLoad(const LoadInst &I); + void visitAtomicStore(const StoreInst &I); + + void visitInlineAsm(ImmutableCallSite CS); + const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + + void visitVAStart(const CallInst &I); + void visitVAArg(const VAArgInst &I); + void visitVAEnd(const CallInst &I); + void visitVACopy(const CallInst &I); + + void visitUserOp1(const Instruction &I) { + llvm_unreachable("UserOp1 should not exist at instruction selection time!"); + } + void visitUserOp2(const Instruction &I) { + llvm_unreachable("UserOp2 should not exist at instruction selection time!"); + } + + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + + /// EmitFuncArgumentDbgValue - If V is an function argument then create + /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// instruction selection, they will be inserted to the entry BB. + bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, const SDValue &N); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 0000000..3b5823b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,645 @@ +//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FSINCOS: return "fsincos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::FP16_TO_FP32: return "fp16_to_fp32"; + case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return "<pre-inc>"; + case ISD::PRE_DEC: return "<pre-dec>"; + case ISD::POST_INC: return "<post-inc>"; + case ISD::POST_DEC: return "<post-dec>"; + } +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (const void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + int64_t offset = BA->getOffset(); + OS << "<"; + WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + OS << ", "; + WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + OS << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } + + if (G) + if (unsigned Order = G->GetOrdering(this)) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + dbgs() << '\n'; + dbgs().indent(indent); + N->dump(G); +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + + if (i) OS << ","; + OS << " "; + + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode. + OS << (const void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, 0, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 0000000..eeea9e4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,3019 @@ +//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "ScheduleDAGSDNodes.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); +STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); +STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); +STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); + +#ifndef NDEBUG +static cl::opt<bool> +EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, + cl::desc("Enable extra verbose messages in the \"fast\" " + "instruction selector")); + // Terminators +STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); +STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); +STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); +STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); +STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); +STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); +STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); + + // Standard binary operators... +STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add"); +STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd"); +STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub"); +STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub"); +STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul"); +STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul"); +STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv"); +STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv"); +STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv"); +STATISTIC(NumFastIselFailURem,"Fast isel fails on URem"); +STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem"); +STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem"); + + // Logical operators... +STATISTIC(NumFastIselFailAnd,"Fast isel fails on And"); +STATISTIC(NumFastIselFailOr,"Fast isel fails on Or"); +STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor"); + + // Memory instructions... +STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca"); +STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load"); +STATISTIC(NumFastIselFailStore,"Fast isel fails on Store"); +STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg"); +STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM"); +STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence"); +STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr"); + + // Convert instructions... +STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc"); +STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt"); +STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt"); +STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc"); +STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt"); +STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI"); +STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI"); +STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP"); +STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP"); +STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr"); +STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt"); +STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast"); + + // Other instructions... +STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp"); +STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp"); +STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI"); +STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select"); +STATISTIC(NumFastIselFailCall,"Fast isel fails on Call"); +STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl"); +STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr"); +STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr"); +STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg"); +STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement"); +STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement"); +STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); +STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); +STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); +STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); +#endif + +static cl::opt<bool> +EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, + cl::desc("Enable verbose messages in the \"fast\" " + "instruction selector")); +static cl::opt<bool> +EnableFastISelAbort("fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction")); +static cl::opt<bool> +EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower a formal argument")); + +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + +#ifndef NDEBUG +static cl::opt<bool> +ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); +static cl::opt<bool> +ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize types")); +static cl::opt<bool> +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +static cl::opt<bool> +ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +static cl::opt<bool> +ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post legalize types" + " dag combine pass")); +static cl::opt<bool> +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt<bool> +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +static cl::opt<bool> +ViewSUnitDAGs("view-sunit-dags", cl::Hidden, + cl::desc("Pop up a window to show SUnit dags after they are processed")); +#else +static const bool ViewDAGCombine1 = false, + ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, + ViewDAGCombineLT = false, + ViewISelDAGs = false, ViewSchedDAGs = false, + ViewSUnitDAGs = false; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterScheduler::FunctionPassCtor, false, + RegisterPassParser<RegisterScheduler> > +ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), + cl::desc("Instruction schedulers available (before register" + " allocation):")); + +static RegisterScheduler +defaultListDAGScheduler("default", "Best scheduler for the target", + createDefaultScheduler); + +namespace llvm { + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetLowering &TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); + + if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || + TLI.getSchedulingPreference() == Sched::Source) + return createSourceListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::RegPressure) + return createBURRListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::Hybrid) + return createHybridListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::VLIW) + return createVLIWDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == Sched::ILP && + "Unknown sched type!"); + return createILPListDAGScheduler(IS, OptLevel); + } +} + +// EmitInstrWithCustomInserter - This method should be implemented by targets +// that mark instructions with the 'usesCustomInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and this method is called to expand it into a sequence of +// instructions, potentially also creating new basic blocks and control flow. +// When new basic blocks are inserted and the edges from MBB to its successors +// are modified, the method should insert pairs of <OldSucc, NewSucc> into the +// DenseMap. +MachineBasicBlock * +TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { +#ifndef NDEBUG + dbgs() << "If a target marks an instruction with " + "'usesCustomInserter', it must implement " + "TargetLowering::EmitInstrWithCustomInserter!"; +#endif + llvm_unreachable(0); +} + +void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + assert(!MI->hasPostISelHook() && + "If a target marks an instruction with 'hasPostISelHook', " + "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, + CodeGenOpt::Level OL) : + MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), + FuncInfo(new FunctionLoweringInfo(TLI)), + CurDAG(new SelectionDAG(tm, OL)), + SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), + GFI(), + OptLevel(OL), + DAGSize(0) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); + } + +SelectionDAGISel::~SelectionDAGISel() { + delete SDB; + delete CurDAG; + delete FuncInfo; +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<GCModuleInfo>(); + AU.addPreserved<GCModuleInfo>(); + AU.addRequired<TargetLibraryInfo>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that +/// may trap on it. In this case we have to split the edge so that the path +/// through the predecessor block that doesn't go to the phi block doesn't +/// execute the possibly trapping instruction. +/// +/// This is required for correctness, so it must be done at -O0. +/// +static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { + // Loop for blocks with phi nodes. + for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + PHINode *PN = dyn_cast<PHINode>(BB->begin()); + if (PN == 0) continue; + + ReprocessBlock: + // For each block with a PHI node, check to see if any of the input values + // are potentially trapping constant expressions. Constant expressions are + // the only potentially trapping value that can occur as the argument to a + // PHI. + for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); + if (CE == 0 || !CE->canTrap()) continue; + + // The only case we have to worry about is when the edge is critical. + // Since this block has a PHI Node, we assume it has multiple input + // edges: check to see if the pred has multiple successors. + BasicBlock *Pred = PN->getIncomingBlock(i); + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + // Okay, we have to split this edge. + SplitCriticalEdge(Pred->getTerminator(), + GetSuccessorNumber(Pred, BB), SDISel, true); + goto ReprocessBlock; + } + } +} + +bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + // Do some sanity-checking on the command-line options. + assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && + "-fast-isel-verbose requires -fast-isel"); + assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && + "-fast-isel-abort requires -fast-isel"); + + const Function &Fn = *mf.getFunction(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + MF = &mf; + RegInfo = &MF->getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + LibInfo = &getAnalysis<TargetLibraryInfo>(); + TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; + + TargetSubtargetInfo &ST = + const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); + ST.resetSubtargetFeatures(MF); + TM.resetTargetOptions(MF); + + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + + SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); + + CurDAG->init(*MF, TTI); + FuncInfo->set(Fn, *MF); + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + else + FuncInfo->BPI = 0; + + SDB->init(GFI, *AA, LibInfo); + + MF->setHasMSInlineAsm(false); + SelectAllBasicBlocks(Fn); + + // If the first basic block in the function has live ins that need to be + // copied into vregs, emit the copies into the top of the block before + // emitting the code for the block. + MachineBasicBlock *EntryMBB = MF->begin(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + + DenseMap<unsigned, unsigned> LiveInMap; + if (!FuncInfo->ArgDbgValues.empty()) + for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), + E = RegInfo->livein_end(); LI != E; ++LI) + if (LI->second) + LiveInMap.insert(std::make_pair(LI->first, LI->second)); + + // Insert DBG_VALUE instructions for function arguments to the entry block. + for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { + MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + EntryMBB->insert(EntryMBB->begin(), MI); + else { + MachineInstr *Def = RegInfo->getVRegDef(Reg); + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } + + // If Reg is live-in then update debug info to track its copy in a vreg. + DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); + if (LDI != LiveInMap.end()) { + MachineInstr *Def = RegInfo->getVRegDef(LDI->second); + MachineBasicBlock::iterator InsertPos = Def; + const MDNode *Variable = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + unsigned Offset = MI->getOperand(1).getImm(); + // Def is never a terminator here, so it is ok to increment InsertPos. + BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(LDI->second, RegState::Debug) + .addImm(Offset).addMetadata(Variable); + + // If this vreg is directly copied into an exported register then + // that COPY instructions also need DBG_VALUE, if it is the only + // user of LDI->second. + MachineInstr *CopyUseMI = NULL; + for (MachineRegisterInfo::use_iterator + UI = RegInfo->use_begin(LDI->second); + MachineInstr *UseMI = UI.skipInstruction();) { + if (UseMI->isDebugValue()) continue; + if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { + CopyUseMI = UseMI; continue; + } + // Otherwise this is another use or second copy use. + CopyUseMI = NULL; break; + } + if (CopyUseMI) { + MachineInstr *NewMI = + BuildMI(*MF, CopyUseMI->getDebugLoc(), + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) + .addImm(Offset).addMetadata(Variable); + MachineBasicBlock::iterator Pos = CopyUseMI; + EntryMBB->insertAfter(Pos, NewMI); + } + } + } + + // Determine if there are any calls in this machine function. + MachineFrameInfo *MFI = MF->getFrameInfo(); + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { + + if (MFI->hasCalls() && MF->hasMSInlineAsm()) + break; + + const MachineBasicBlock *MBB = I; + for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); + II != IE; ++II) { + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); + if ((MCID.isCall() && !MCID.isReturn()) || + II->isStackAligningInlineAsm()) { + MFI->setHasCalls(true); + } + if (II->isMSInlineAsm()) { + MF->setHasMSInlineAsm(true); + } + } + } + + // Determine if there is a call to setjmp in the machine function. + MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); + + // Replace forward-declared registers with the registers containing + // the desired value. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap<unsigned, unsigned>::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + for (;;) { + DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Replace it. + MRI.replaceRegWith(From, To); + } + + // Freeze the set of reserved registers now that MachineFrameInfo has been + // set up. All the information required by getReservedRegs() should be + // available now. + MRI.freezeReservedRegs(*MF); + + // Release function-specific state. SDB and CurDAG are already cleared + // at this point. + FuncInfo->clear(); + + return true; +} + +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { + // Lower all of the non-terminator instructions. If a call is emitted + // as a tail call, cease emitting nodes for this block. Terminators + // are handled below. + for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) + SDB->visit(*I); + + // Make sure the root of the DAG is up-to-date. + CurDAG->setRoot(SDB->getControlRoot()); + HadTailCall = SDB->HasTailCall; + SDB->clear(); + + // Final step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(); +} + +void SelectionDAGISel::ComputeLiveOutVRegInfo() { + SmallPtrSet<SDNode*, 128> VisitedNodes; + SmallVector<SDNode*, 128> Worklist; + + Worklist.push_back(CurDAG->getRoot().getNode()); + + APInt KnownZero; + APInt KnownOne; + + do { + SDNode *N = Worklist.pop_back_val(); + + // If we've already seen this node, ignore it. + if (!VisitedNodes.insert(N)) + continue; + + // Otherwise, add all chain operands to the worklist. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + Worklist.push_back(N->getOperand(i).getNode()); + + // If this is a CopyToReg with a vreg dest, process it. + if (N->getOpcode() != ISD::CopyToReg) + continue; + + unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // Ignore non-scalar or non-integer values. + SDValue Src = N->getOperand(2); + EVT SrcVT = Src.getValueType(); + if (!SrcVT.isInteger() || SrcVT.isVector()) + continue; + + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); + CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); + } while (!Worklist.empty()); +} + +void SelectionDAGISel::CodeGenAndEmitDAG() { + std::string GroupName; + if (TimePassesIsEnabled) + GroupName = "Instruction Selection and Scheduling"; + std::string BlockName; + int BlockNumber = -1; + (void)BlockNumber; +#ifdef NDEBUG + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || + ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + ViewSUnitDAGs) +#endif + { + BlockNumber = FuncInfo->MBB->getNumber(); + BlockName = MF->getName().str() + ":" + + FuncInfo->MBB->getBasicBlock()->getName().str(); + } + DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + + // Run the DAG combiner in pre-legalize mode. + { + NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); + CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + + BlockName); + + bool Changed; + { + NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); + Changed = CurDAG->LegalizeTypes(); + } + + DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (Changed) { + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("DAG Combining after legalize types", GroupName, + TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + } + + { + NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); + Changed = CurDAG->LegalizeVectors(); + } + + if (Changed) { + { + NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); + CurDAG->LegalizeTypes(); + } + + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, + TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" + << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + } + + if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + + { + NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); + CurDAG->Legalize(); + } + + DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + + // Run the DAG combiner in post-legalize mode. + { + NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (OptLevel != CodeGenOpt::None) + ComputeLiveOutVRegInfo(); + + if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + { + NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); + DoInstructionSelection(); + } + + DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + + // Schedule machine code. + ScheduleDAGSDNodes *Scheduler = CreateScheduler(); + { + NamedRegionTimer T("Instruction Scheduling", GroupName, + TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB); + } + + if (ViewSUnitDAGs) Scheduler->viewGraph(); + + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. + MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; + { + NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); + } + + // If the block was split, make sure we update any references that are used to + // update PHI nodes later on. + if (FirstMBB != LastMBB) + SDB->UpdateSplitBlock(FirstMBB, LastMBB); + + // Free the scheduler state. + { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, + TimePassesIsEnabled); + delete Scheduler; + } + + // Free the SelectionDAG state, now that we're finished with it. + CurDAG->clear(); +} + +namespace { +/// ISelUpdater - helper class to handle updates of the instruction selection +/// graph. +class ISelUpdater : public SelectionDAG::DAGUpdateListener { + SelectionDAG::allnodes_iterator &ISelPosition; +public: + ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp) + : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {} + + /// NodeDeleted - Handle nodes deleted from the graph. If the node being + /// deleted is the current ISelPosition node, update ISelPosition. + /// + virtual void NodeDeleted(SDNode *N, SDNode *E) { + if (ISelPosition == SelectionDAG::allnodes_iterator(N)) + ++ISelPosition; + } +}; +} // end anonymous namespace + +void SelectionDAGISel::DoInstructionSelection() { + DEBUG(errs() << "===== Instruction selection begins: BB#" + << FuncInfo->MBB->getNumber() + << " '" << FuncInfo->MBB->getName() << "'\n"); + + PreprocessISelDAG(); + + // Select target instructions for the DAG. + { + // Number all nodes with a topological order and set DAGSize. + DAGSize = CurDAG->AssignTopologicalOrder(); + + // Create a dummy node (which is not added to allnodes), that adds + // a reference to the root node, preventing it from being deleted, + // and tracking any changes of the root. + HandleSDNode Dummy(CurDAG->getRoot()); + SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode()); + ++ISelPosition; + + // Make sure that ISelPosition gets properly updated when nodes are deleted + // in calls made from this function. + ISelUpdater ISU(*CurDAG, ISelPosition); + + // The AllNodes list is now topological-sorted. Visit the + // nodes by starting at the end of the list (the root of the + // graph) and preceding back toward the beginning (the entry + // node). + while (ISelPosition != CurDAG->allnodes_begin()) { + SDNode *Node = --ISelPosition; + // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, + // but there are currently some corner cases that it misses. Also, this + // makes it theoretically possible to disable the DAGCombiner. + if (Node->use_empty()) + continue; + + SDNode *ResNode = Select(Node); + + // FIXME: This is pretty gross. 'Select' should be changed to not return + // anything at all and this code should be nuked with a tactical strike. + + // If node should not be replaced, continue with the next one. + if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) + continue; + // Replace node. + if (ResNode) { + // Propagate ordering + CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); + + ReplaceUses(Node, ResNode); + } + + // If after the replacement this node is not used any more, + // remove this dead node. + if (Node->use_empty()) // Don't delete EntryToken, etc. + CurDAG->RemoveDeadNode(Node); + } + + CurDAG->setRoot(Dummy.getValue()); + } + + DEBUG(errs() << "===== Instruction selection ends:\n"); + + PostprocessISelDAG(); +} + +/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and +/// do other setup for EH landing-pad blocks. +void SelectionDAGISel::PrepareEHLandingPad() { + MachineBasicBlock *MBB = FuncInfo->MBB; + + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MCSymbol *Label = MF->getMMI().addLandingPad(MBB); + + // Assign the call site to the landing pad's begin label. + MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); + + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionPointerRegister(); + if (Reg) MBB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) MBB->addLiveIn(Reg); +} + +/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified +/// load into the specified FoldInst. Note that we could have a sequence where +/// multiple LLVM IR instructions are folded into the same machineinstr. For +/// example we could have: +/// A: x = load i32 *P +/// B: y = icmp A, 42 +/// C: br y, ... +/// +/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and +/// any other folded instructions) because it is between A and C. +/// +/// If we succeed in folding the load into the operation, return true. +/// +bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + const Instruction *FoldInst, + FastISel *FastIS) { + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + + // If we didn't find the fold instruction, then we failed to collapse the + // sequence. + if (TheUser != FoldInst) + return false; + + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) return false; + + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. + unsigned LoadReg = FastIS->getRegForValue(LI); + if (LoadReg == 0) + return false; + + // Check to see what the uses of this vreg are. If it has no uses, or more + // than one use (at the machine instr level) then we can't fold it. + MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); + if (RI == RegInfo->reg_end()) + return false; + + // See if there is exactly one use of the vreg. If there are multiple uses, + // then the instruction got lowered to multiple machine instructions or the + // use of the loaded value ended up being multiple operands of the result, in + // either case, we can't fold this. + MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; + if (PostRI != RegInfo->reg_end()) + return false; + + assert(RI.getOperand().isUse() && + "The only use of the vreg must be a use, we haven't emitted the def!"); + + MachineInstr *User = &*RI; + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes, make + // sure they get inserted in a logical place before the new instruction. + FuncInfo->InsertPt = User; + FuncInfo->MBB = User->getParent(); + + // Ask the target to try folding the load. + return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); +} + +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !isa<LandingPadInst>(I) && // Landingpad instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. +} + +#ifndef NDEBUG +// Collect per Instruction statistics for fast-isel misses. Only those +// instructions that cause the bail are accounted for. It does not account for +// instructions higher in the block. Thus, summing the per instructions stats +// will not add up to what is reported by NumFastIselFailures. +static void collectFailStats(const Instruction *I) { + switch (I->getOpcode()) { + default: assert (0 && "<Invalid operator> "); + + // Terminators + case Instruction::Ret: NumFastIselFailRet++; return; + case Instruction::Br: NumFastIselFailBr++; return; + case Instruction::Switch: NumFastIselFailSwitch++; return; + case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; + case Instruction::Invoke: NumFastIselFailInvoke++; return; + case Instruction::Resume: NumFastIselFailResume++; return; + case Instruction::Unreachable: NumFastIselFailUnreachable++; return; + + // Standard binary operators... + case Instruction::Add: NumFastIselFailAdd++; return; + case Instruction::FAdd: NumFastIselFailFAdd++; return; + case Instruction::Sub: NumFastIselFailSub++; return; + case Instruction::FSub: NumFastIselFailFSub++; return; + case Instruction::Mul: NumFastIselFailMul++; return; + case Instruction::FMul: NumFastIselFailFMul++; return; + case Instruction::UDiv: NumFastIselFailUDiv++; return; + case Instruction::SDiv: NumFastIselFailSDiv++; return; + case Instruction::FDiv: NumFastIselFailFDiv++; return; + case Instruction::URem: NumFastIselFailURem++; return; + case Instruction::SRem: NumFastIselFailSRem++; return; + case Instruction::FRem: NumFastIselFailFRem++; return; + + // Logical operators... + case Instruction::And: NumFastIselFailAnd++; return; + case Instruction::Or: NumFastIselFailOr++; return; + case Instruction::Xor: NumFastIselFailXor++; return; + + // Memory instructions... + case Instruction::Alloca: NumFastIselFailAlloca++; return; + case Instruction::Load: NumFastIselFailLoad++; return; + case Instruction::Store: NumFastIselFailStore++; return; + case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return; + case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return; + case Instruction::Fence: NumFastIselFailFence++; return; + case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return; + + // Convert instructions... + case Instruction::Trunc: NumFastIselFailTrunc++; return; + case Instruction::ZExt: NumFastIselFailZExt++; return; + case Instruction::SExt: NumFastIselFailSExt++; return; + case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return; + case Instruction::FPExt: NumFastIselFailFPExt++; return; + case Instruction::FPToUI: NumFastIselFailFPToUI++; return; + case Instruction::FPToSI: NumFastIselFailFPToSI++; return; + case Instruction::UIToFP: NumFastIselFailUIToFP++; return; + case Instruction::SIToFP: NumFastIselFailSIToFP++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; + + // Other instructions... + case Instruction::ICmp: NumFastIselFailICmp++; return; + case Instruction::FCmp: NumFastIselFailFCmp++; return; + case Instruction::PHI: NumFastIselFailPHI++; return; + case Instruction::Select: NumFastIselFailSelect++; return; + case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Shl: NumFastIselFailShl++; return; + case Instruction::LShr: NumFastIselFailLShr++; return; + case Instruction::AShr: NumFastIselFailAShr++; return; + case Instruction::VAArg: NumFastIselFailVAArg++; return; + case Instruction::ExtractElement: NumFastIselFailExtractElement++; return; + case Instruction::InsertElement: NumFastIselFailInsertElement++; return; + case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return; + case Instruction::ExtractValue: NumFastIselFailExtractValue++; return; + case Instruction::InsertValue: NumFastIselFailInsertValue++; return; + case Instruction::LandingPad: NumFastIselFailLandingPad++; return; + } +} +#endif + +void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { + // Initialize the Fast-ISel state, if needed. + FastISel *FastIS = 0; + if (TM.Options.EnableFastISel) + FastIS = TLI.createFastISel(*FuncInfo, LibInfo); + + // Iterate over all basic blocks in the function. + ReversePostOrderTraversal<const Function*> RPOT(&Fn); + for (ReversePostOrderTraversal<const Function*>::rpo_iterator + I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + const BasicBlock *LLVMBB = *I; + + if (OptLevel != CodeGenOpt::None) { + bool AllPredsVisited = true; + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + if (!FuncInfo->VisitedBBs.count(*PI)) { + AllPredsVisited = false; + break; + } + } + + if (AllPredsVisited) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) + FuncInfo->ComputePHILiveOutRegInfo(PN); + } else { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) + FuncInfo->InvalidatePHILiveOutRegInfo(PN); + } + + FuncInfo->VisitedBBs.insert(LLVMBB); + } + + BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); + BasicBlock::const_iterator const End = LLVMBB->end(); + BasicBlock::const_iterator BI = End; + + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + // Setup an EH landing-pad block. + if (FuncInfo->MBB->isLandingPad()) + PrepareEHLandingPad(); + + // Before doing SelectionDAG ISel, see if FastISel has been requested. + if (FastIS) { + FastIS->startNewBlock(); + + // Emit code for any incoming arguments. This must happen before + // beginning FastISel on the entry block. + if (LLVMBB == &Fn.getEntryBlock()) { + // Lower any arguments needed in this block if this is the entry block. + if (!FastIS->LowerArguments()) { + // Fast isel failed to lower these arguments + if (EnableFastISelAbortArgs) + llvm_unreachable("FastISel didn't lower all arguments"); + + // Use SelectionDAG argument lowering + LowerArguments(Fn); + CurDAG->setRoot(SDB->getControlRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(0); + } + + unsigned NumFastIselRemaining = std::distance(Begin, End); + // Do FastISel on as many instructions as possible. + for (; BI != Begin; --BI) { + const Instruction *Inst = llvm::prior(BI); + + // If we no longer require this instruction, skip it. + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { + --NumFastIselRemaining; + continue; + } + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + + // Try to select the instruction with FastISel. + if (FastIS->SelectInstruction(Inst)) { + --NumFastIselRemaining; + ++NumFastIselSuccess; + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != Begin) { + BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) { + // If we succeeded, don't re-select the load. + BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + --NumFastIselRemaining; + ++NumFastIselSuccess; + } + continue; + } + +#ifndef NDEBUG + if (EnableFastISelVerbose2) + collectFailStats(Inst); +#endif + + // Then handle certain instructions as single-LLVM-Instruction blocks. + if (isa<CallInst>(Inst)) { + + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel missed call: "; + Inst->dump(); + } + + if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; + if (!R) + R = FuncInfo->CreateRegs(Inst->getType()); + } + + bool HadTailCall = false; + MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; + SelectBasicBlock(Inst, BI, HadTailCall); + + // If the call was emitted as a tail call, we're done with the block. + // We also need to delete any previously emitted instructions. + if (HadTailCall) { + FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end()); + --BI; + break; + } + + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + NumFastIselRemaining = RemainingNow; + continue; + } + + if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { + // Don't abort, and use a different message for terminator misses. + NumFastIselFailures += NumFastIselRemaining; + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel missed terminator: "; + Inst->dump(); + } + } else { + NumFastIselFailures += NumFastIselRemaining; + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel miss: "; + Inst->dump(); + } + if (EnableFastISelAbort) + // The "fast" selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + llvm_unreachable("FastISel didn't select the entire block"); + } + break; + } + + FastIS->recomputeInsertPt(); + } else { + // Lower any arguments needed in this block if this is the entry block. + if (LLVMBB == &Fn.getEntryBlock()) + LowerArguments(Fn); + } + + if (Begin != BI) + ++NumDAGBlocks; + else + ++NumFastIselBlocks; + + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + } + + FinishBasicBlock(); + FuncInfo->PHINodesToUpdate.clear(); + } + + delete FastIS; + SDB->clearDanglingDebugInfo(); +} + +void +SelectionDAGISel::FinishBasicBlock() { + + DEBUG(dbgs() << "Total amount of phi nodes to update: " + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) + dbgs() << "Node " << i << " : (" + << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + if (SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty()) { + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) + continue; + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); + } + return; + } + + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDB->BitTestCases[i].Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + uint32_t UnhandledWeight = 0; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) + UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; + + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + if (j+1 != ej) + SDB->visitBitTestCase(SDB->BitTestCases[i], + SDB->BitTestCases[i].Cases[j+1].ThisBB, + UnhandledWeight, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); + else + SDB->visitBitTestCase(SDB->BitTestCases[i], + SDB->BitTestCases[i].Default, + UnhandledWeight, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); + + + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB. + if (PHIBB == SDB->BitTestCases[i].Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Parent) + .addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB); + // One of "cases" BB. + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); + j != ej; ++j) { + MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; + if (cBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); + } + } + } + SDB->BitTestCases.clear(); + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDB->JTCases[i].first.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, + FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTable(SDB->JTCases[i].second); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == SDB->JTCases[i].second.Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->JTCases[i].first.HeaderBB); + // JT BB. Just iterate over successors here + if (FuncInfo->MBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); + } + } + SDB->JTCases.clear(); + + // If the switch block involved a branch to one of the actual successors, we + // need to update PHI nodes in that block. + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); + } + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + + // Determine the unique successors. + SmallVector<MachineBasicBlock *, 2> Succs; + Succs.push_back(SDB->SwitchCases[i].TrueBB); + if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) + Succs.push_back(SDB->SwitchCases[i].FalseBB); + + // Emit the code. Note that this could result in FuncInfo->MBB being split. + SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Remember the last block, now that any splitting is done, for use in + // populating PHI nodes in successors. + MachineBasicBlock *ThisBB = FuncInfo->MBB; + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator + MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end(); + MBBI != MBBE && MBBI->isPHI(); ++MBBI) { + MachineInstrBuilder PHI(*MF, MBBI); + // This value for this PHI node is recorded in PHINodesToUpdate. + for (unsigned pn = 0; ; ++pn) { + assert(pn != FuncInfo->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (FuncInfo->PHINodesToUpdate[pn].first == PHI) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB); + break; + } + } + } + } + } + } + SDB->SwitchCases.clear(); +} + + +/// Create the scheduler. If a specific scheduler was specified +/// via the SchedulerRegistry, use it, otherwise select the +/// one preferred by the target. +/// +ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { + RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); + + if (!Ctor) { + Ctor = ISHeuristic; + RegisterScheduler::setDefault(Ctor); + } + + return Ctor(this, OptLevel); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); + + // If all the missing bits in the or are already known to be set, match! + if ((NeededMask & KnownOne) == NeededMask) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel:: +SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { + std::vector<SDValue> InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 + Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 + Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) + + unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Glue) + --e; // Don't process a glue operand if it is here. + + while (i != e) { + unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); + if (!InlineAsm::isMemKind(Flags)) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, + InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); + i += InlineAsm::getNumOperandRegisters(Flags) + 1; + } else { + assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + "Memory operand with multiple values?"); + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector<SDValue> SelOps; + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + report_fatal_error("Could not match memory address. Inline asm" + " failure!"); + + // Add this to the output node. + unsigned NewFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the glue input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +/// findGlueUse - Return use of MVT::Glue value produced by the specified +/// SDNode. +/// +static SDNode *findGlueUse(SDNode *N) { + unsigned FlagResNo = N->getNumValues()-1; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDUse &Use = I.getUse(); + if (Use.getResNo() == FlagResNo) + return Use.getUser(); + } + return NULL; +} + +/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". +/// This function recursively traverses up the operand chain, ignoring +/// certain nodes. +static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, + SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited, + bool IgnoreChains) { + // The NodeID's are given uniques ID's where a node ID is guaranteed to be + // greater than all of its (recursive) operands. If we scan to a point where + // 'use' is smaller than the node we're scanning for, then we know we will + // never find it. + // + // The Use may be -1 (unassigned) if it is a newly allocated node. This can + // happen because we scan down to newly selected nodes in the case of glue + // uses. + if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) + return false; + + // Don't revisit nodes if we already scanned it and didn't fail, we know we + // won't fail if we scan it again. + if (!Visited.insert(Use)) + return false; + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + // Ignore chain uses, they are validated by HandleMergeInputChains. + if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains) + continue; + + SDNode *N = Use->getOperand(i).getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) + return true; + } + return false; +} + +/// IsProfitableToFold - Returns true if it's profitable to fold the specific +/// operand node N of U during instruction selection that starts at Root. +bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + return N.hasOneUse(); +} + +/// IsLegalToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root. +bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, + CodeGenOpt::Level OptLevel, + bool IgnoreChains) { + if (OptLevel == CodeGenOpt::None) return false; + + // If Root use can somehow reach N through a path that that doesn't contain + // U then folding N would create a cycle. e.g. In the following + // diagram, Root can reach N through X. If N is folded into into Root, then + // X is both a predecessor and a successor of U. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ / // + // \ / // + // [Root*] // + // + // * indicates nodes to be folded together. + // + // If Root produces glue, then it gets (even more) interesting. Since it + // will be "glued" together with its glue use in the scheduler, we need to + // check if it might reach N. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ \ // + // \ | // + // [Root*] | // + // ^ | // + // f | // + // | / // + // [Y] / // + // ^ / // + // f / // + // | / // + // [GU] // + // + // If GU (glue use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of GU and a successor of + // Fold. But since Fold and GU are glued together, this will create + // a cycle in the scheduling graph. + + // If the node has glue, walk down the graph to the "lowest" node in the + // glueged set. + EVT VT = Root->getValueType(Root->getNumValues()-1); + while (VT == MVT::Glue) { + SDNode *GU = findGlueUse(Root); + if (GU == NULL) + break; + Root = GU; + VT = Root->getValueType(Root->getNumValues()-1); + + // If our query node has a glue result with a use, we've walked up it. If + // the user (which has already been selected) has a chain or indirectly uses + // the chain, our WalkChainUsers predicate will not consider it. Because of + // this, we cannot ignore chains in this predicate. + IgnoreChains = false; + } + + + SmallPtrSet<SDNode*, 16> Visited; + return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); +} + +SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { + std::vector<SDValue> Ops(N->op_begin(), N->op_end()); + SelectInlineAsmMemoryOperands(Ops); + + EVT VTs[] = { MVT::Other, MVT::Glue }; + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + VTs, &Ops[0], Ops.size()); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { + return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); +} + +/// GetVBR - decode a vbr encoding whose top bit is set. +LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t +GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { + assert(Val >= 128 && "Not a VBR"); + Val &= 127; // Remove first vbr bit. + + unsigned Shift = 7; + uint64_t NextBits; + do { + NextBits = MatcherTable[Idx++]; + Val |= (NextBits&127) << Shift; + Shift += 7; + } while (NextBits & 128); + + return Val; +} + + +/// UpdateChainsAndGlue - When a match is complete, this method updates uses of +/// interior glue and chain results to use the new glue and chain results. +void SelectionDAGISel:: +UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode*> &ChainNodesMatched, + SDValue InputGlue, + const SmallVectorImpl<SDNode*> &GlueResultNodesMatched, + bool isMorphNodeTo) { + SmallVector<SDNode*, 4> NowDeadNodes; + + // Now that all the normal results are replaced, we replace the chain and + // glue results if present. + if (!ChainNodesMatched.empty()) { + assert(InputChain.getNode() != 0 && + "Matched input chains but didn't produce a chain"); + // Loop over all of the nodes we matched that produced a chain result. + // Replace all the chain results with the final chain we ended up with. + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + SDNode *ChainNode = ChainNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (ChainNode->getOpcode() == ISD::DELETED_NODE) + continue; + + // Don't replace the results of the root node if we're doing a + // MorphNodeTo. + if (ChainNode == NodeToMatch && isMorphNodeTo) + continue; + + SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); + if (ChainVal.getValueType() == MVT::Glue) + ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); + assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); + CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); + + // If the node became dead and we haven't already seen it, delete it. + if (ChainNode->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) + NowDeadNodes.push_back(ChainNode); + } + } + + // If the result produces glue, update any glue results in the matched + // pattern with the glue result. + if (InputGlue.getNode() != 0) { + // Handle any interior nodes explicitly marked. + for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { + SDNode *FRN = GlueResultNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (FRN->getOpcode() == ISD::DELETED_NODE) + continue; + + assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && + "Doesn't have a glue result"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), + InputGlue); + + // If the node became dead and we haven't already seen it, delete it. + if (FRN->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) + NowDeadNodes.push_back(FRN); + } + } + + if (!NowDeadNodes.empty()) + CurDAG->RemoveDeadNodes(NowDeadNodes); + + DEBUG(errs() << "ISEL: Match complete!\n"); +} + +enum ChainResult { + CR_Simple, + CR_InducesCycle, + CR_LeadsToInteriorNode +}; + +/// WalkChainUsers - Walk down the users of the specified chained node that is +/// part of the pattern we're matching, looking at all of the users we find. +/// This determines whether something is an interior node, whether we have a +/// non-pattern node in between two pattern nodes (which prevent folding because +/// it would induce a cycle) and whether we have a TokenFactor node sandwiched +/// between pattern nodes (in which case the TF becomes part of the pattern). +/// +/// The walk we do here is guaranteed to be small because we quickly get down to +/// already selected nodes "below" us. +static ChainResult +WalkChainUsers(const SDNode *ChainedNode, + SmallVectorImpl<SDNode*> &ChainedNodesInPattern, + SmallVectorImpl<SDNode*> &InteriorChainedNodes) { + ChainResult Result = CR_Simple; + + for (SDNode::use_iterator UI = ChainedNode->use_begin(), + E = ChainedNode->use_end(); UI != E; ++UI) { + // Make sure the use is of the chain, not some other value we produce. + if (UI.getUse().getValueType() != MVT::Other) continue; + + SDNode *User = *UI; + + // If we see an already-selected machine node, then we've gone beyond the + // pattern that we're selecting down into the already selected chunk of the + // DAG. + if (User->isMachineOpcode() || + User->getOpcode() == ISD::HANDLENODE) // Root of the graph. + continue; + + unsigned UserOpcode = User->getOpcode(); + if (UserOpcode == ISD::CopyToReg || + UserOpcode == ISD::CopyFromReg || + UserOpcode == ISD::INLINEASM || + UserOpcode == ISD::EH_LABEL || + UserOpcode == ISD::LIFETIME_START || + UserOpcode == ISD::LIFETIME_END) { + // If their node ID got reset to -1 then they've already been selected. + // Treat them like a MachineOpcode. + if (User->getNodeId() == -1) + continue; + } + + // If we have a TokenFactor, we handle it specially. + if (User->getOpcode() != ISD::TokenFactor) { + // If the node isn't a token factor and isn't part of our pattern, then it + // must be a random chained node in between two nodes we're selecting. + // This happens when we have something like: + // x = load ptr + // call + // y = x+4 + // store y -> ptr + // Because we structurally match the load/store as a read/modify/write, + // but the call is chained between them. We cannot fold in this case + // because it would induce a cycle in the graph. + if (!std::count(ChainedNodesInPattern.begin(), + ChainedNodesInPattern.end(), User)) + return CR_InducesCycle; + + // Otherwise we found a node that is part of our pattern. For example in: + // x = load ptr + // y = x+4 + // store y -> ptr + // This would happen when we're scanning down from the load and see the + // store as a user. Record that there is a use of ChainedNode that is + // part of the pattern and keep scanning uses. + Result = CR_LeadsToInteriorNode; + InteriorChainedNodes.push_back(User); + continue; + } + + // If we found a TokenFactor, there are two cases to consider: first if the + // TokenFactor is just hanging "below" the pattern we're matching (i.e. no + // uses of the TF are in our pattern) we just want to ignore it. Second, + // the TokenFactor can be sandwiched in between two chained nodes, like so: + // [Load chain] + // ^ + // | + // [Load] + // ^ ^ + // | \ DAG's like cheese + // / \ do you? + // / | + // [TokenFactor] [Op] + // ^ ^ + // | | + // \ / + // \ / + // [Store] + // + // In this case, the TokenFactor becomes part of our match and we rewrite it + // as a new TokenFactor. + // + // To distinguish these two cases, do a recursive walk down the uses. + switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) { + case CR_Simple: + // If the uses of the TokenFactor are just already-selected nodes, ignore + // it, it is "below" our pattern. + continue; + case CR_InducesCycle: + // If the uses of the TokenFactor lead to nodes that are not part of our + // pattern that are not selected, folding would turn this into a cycle, + // bail out now. + return CR_InducesCycle; + case CR_LeadsToInteriorNode: + break; // Otherwise, keep processing. + } + + // Okay, we know we're in the interesting interior case. The TokenFactor + // is now going to be considered part of the pattern so that we rewrite its + // uses (it may have uses that are not part of the pattern) with the + // ultimate chain result of the generated code. We will also add its chain + // inputs as inputs to the ultimate TokenFactor we create. + Result = CR_LeadsToInteriorNode; + ChainedNodesInPattern.push_back(User); + InteriorChainedNodes.push_back(User); + continue; + } + + return Result; +} + +/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains +/// operation for when the pattern matched at least one node with a chains. The +/// input vector contains a list of all of the chained nodes that we match. We +/// must determine if this is a valid thing to cover (i.e. matching it won't +/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will +/// be used as the input node chain for the generated nodes. +static SDValue +HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, + SelectionDAG *CurDAG) { + // Walk all of the chained nodes we've matched, recursively scanning down the + // users of the chain result. This adds any TokenFactor nodes that are caught + // in between chained nodes to the chained and interior nodes list. + SmallVector<SDNode*, 3> InteriorChainedNodes; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, + InteriorChainedNodes) == CR_InducesCycle) + return SDValue(); // Would induce a cycle. + } + + // Okay, we have walked all the matched nodes and collected TokenFactor nodes + // that we are interested in. Form our input TokenFactor node. + SmallVector<SDValue, 3> InputChains; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + // Add the input chain of this node to the InputChains list (which will be + // the operands of the generated TokenFactor) if it's not an interior node. + SDNode *N = ChainNodesMatched[i]; + if (N->getOpcode() != ISD::TokenFactor) { + if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) + continue; + + // Otherwise, add the input chain. + SDValue InChain = ChainNodesMatched[i]->getOperand(0); + assert(InChain.getValueType() == MVT::Other && "Not a chain"); + InputChains.push_back(InChain); + continue; + } + + // If we have a token factor, we want to add all inputs of the token factor + // that are not part of the pattern we're matching. + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), + N->getOperand(op).getNode())) + InputChains.push_back(N->getOperand(op)); + } + } + + SDValue Res; + if (InputChains.size() == 1) + return InputChains[0]; + return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), + MVT::Other, &InputChains[0], InputChains.size()); +} + +/// MorphNode - Handle morphing a node in place for the selector. +SDNode *SelectionDAGISel:: +MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { + // It is possible we're using MorphNodeTo to replace a node with no + // normal results with one that has a normal result (or we could be + // adding a chain) and the input could have glue and chains as well. + // In this case we need to shift the operands down. + // FIXME: This is a horrible hack and broken in obscure cases, no worse + // than the old isel though. + int OldGlueResultNo = -1, OldChainResultNo = -1; + + unsigned NTMNumResults = Node->getNumValues(); + if (Node->getValueType(NTMNumResults-1) == MVT::Glue) { + OldGlueResultNo = NTMNumResults-1; + if (NTMNumResults != 1 && + Node->getValueType(NTMNumResults-2) == MVT::Other) + OldChainResultNo = NTMNumResults-2; + } else if (Node->getValueType(NTMNumResults-1) == MVT::Other) + OldChainResultNo = NTMNumResults-1; + + // Call the underlying SelectionDAG routine to do the transmogrification. Note + // that this deletes operands of the old node that become dead. + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } + + unsigned ResNumResults = Res->getNumValues(); + // Move the glue if needed. + if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && + (unsigned)OldGlueResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), + SDValue(Res, ResNumResults-1)); + + if ((EmitNodeInfo & OPFL_GlueOutput) != 0) + --ResNumResults; + + // Move the chain reference if needed. + if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && + (unsigned)OldChainResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), + SDValue(Res, ResNumResults-1)); + + // Otherwise, no replacement happened because the node already exists. Replace + // Uses of the old node with the new one. + if (Res != Node) + CurDAG->ReplaceAllUsesWith(Node, Res); + + return Res; +} + +/// CheckSame - Implements OP_CheckSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { + // Accept if it is exactly the same as a previously recorded node. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + return N == RecordedNodes[RecNo].first; +} + +/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + const SelectionDAGISel &SDISel) { + return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); +} + +/// CheckNodePredicate - Implements OP_CheckNodePredicate. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + const SelectionDAGISel &SDISel, SDNode *N) { + return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDNode *N) { + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + return N->getOpcode() == Opc; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (N.getValueType() == VT) return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); +} + + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + return cast<CondCodeSDNode>(N)->get() == + (ISD::CondCode)MatcherTable[MatcherIndex++]; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering &TLI) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (cast<VTSDNode>(N)->getVT() == VT) + return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy(); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + return C != 0 && C->getSExtValue() == Val; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::AND) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::OR) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); +} + +/// IsPredicateKnownToFail - If we know how and can do so without pushing a +/// scope, evaluate the current node. If the current predicate is known to +/// fail, set Result=true and return anything. If the current predicate is +/// known to pass, set Result=false and return the MatcherIndex to continue +/// with. If the current predicate is unknown, set Result=false and return the +/// MatcherIndex to continue with. +static unsigned IsPredicateKnownToFail(const unsigned char *Table, + unsigned Index, SDValue N, + bool &Result, + const SelectionDAGISel &SDISel, + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { + switch (Table[Index++]) { + default: + Result = false; + return Index-1; // Could not evaluate this predicate. + case SelectionDAGISel::OPC_CheckSame: + Result = !::CheckSame(Table, Index, N, RecordedNodes); + return Index; + case SelectionDAGISel::OPC_CheckPatternPredicate: + Result = !::CheckPatternPredicate(Table, Index, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckPredicate: + Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckOpcode: + Result = !::CheckOpcode(Table, Index, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckType: + Result = !::CheckType(Table, Index, N, SDISel.TLI); + return Index; + case SelectionDAGISel::OPC_CheckChild0Type: + case SelectionDAGISel::OPC_CheckChild1Type: + case SelectionDAGISel::OPC_CheckChild2Type: + case SelectionDAGISel::OPC_CheckChild3Type: + case SelectionDAGISel::OPC_CheckChild4Type: + case SelectionDAGISel::OPC_CheckChild5Type: + case SelectionDAGISel::OPC_CheckChild6Type: + case SelectionDAGISel::OPC_CheckChild7Type: + Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); + return Index; + case SelectionDAGISel::OPC_CheckCondCode: + Result = !::CheckCondCode(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckValueType: + Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + return Index; + case SelectionDAGISel::OPC_CheckInteger: + Result = !::CheckInteger(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckAndImm: + Result = !::CheckAndImm(Table, Index, N, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckOrImm: + Result = !::CheckOrImm(Table, Index, N, SDISel); + return Index; + } +} + +namespace { + +struct MatchScope { + /// FailIndex - If this match fails, this is the index to continue with. + unsigned FailIndex; + + /// NodeStack - The node stack when the scope was formed. + SmallVector<SDValue, 4> NodeStack; + + /// NumRecordedNodes - The number of recorded nodes when the scope was formed. + unsigned NumRecordedNodes; + + /// NumMatchedMemRefs - The number of matched memref entries. + unsigned NumMatchedMemRefs; + + /// InputChain/InputGlue - The current chain/glue + SDValue InputChain, InputGlue; + + /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. + bool HasChainNodesMatched, HasGlueResultNodesMatched; +}; + +} + +SDNode *SelectionDAGISel:: +SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, + unsigned TableSize) { + // FIXME: Should these even be selected? Handle these cases in the caller? + switch (NodeToMatch->getOpcode()) { + default: + break; + case ISD::EntryToken: // These nodes remain the same. + case ISD::BasicBlock: + case ISD::Register: + case ISD::RegisterMask: + //case ISD::VALUETYPE: + //case ISD::CONDCODE: + case ISD::HANDLENODE: + case ISD::MDNODE_SDNODE: + case ISD::TargetConstant: + case ISD::TargetConstantFP: + case ISD::TargetConstantPool: + case ISD::TargetFrameIndex: + case ISD::TargetExternalSymbol: + case ISD::TargetBlockAddress: + case ISD::TargetJumpTable: + case ISD::TargetGlobalTLSAddress: + case ISD::TargetGlobalAddress: + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::EH_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + NodeToMatch->setNodeId(-1); // Mark selected. + return 0; + case ISD::AssertSext: + case ISD::AssertZext: + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), + NodeToMatch->getOperand(0)); + return 0; + case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::UNDEF: return Select_UNDEF(NodeToMatch); + } + + assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); + + // Set up the node stack with NodeToMatch as the only node on the stack. + SmallVector<SDValue, 8> NodeStack; + SDValue N = SDValue(NodeToMatch, 0); + NodeStack.push_back(N); + + // MatchScopes - Scopes used when matching, if a match failure happens, this + // indicates where to continue checking. + SmallVector<MatchScope, 8> MatchScopes; + + // RecordedNodes - This is the set of nodes that have been recorded by the + // state machine. The second value is the parent of the node, or null if the + // root is recorded. + SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes; + + // MatchedMemRefs - This is the set of MemRef's we've seen in the input + // pattern. + SmallVector<MachineMemOperand*, 2> MatchedMemRefs; + + // These are the current input chain and glue for use when generating nodes. + // Various Emit operations change these. For example, emitting a copytoreg + // uses and updates these. + SDValue InputChain, InputGlue; + + // ChainNodesMatched - If a pattern matches nodes that have input/output + // chains, the OPC_EmitMergeInputChains operation is emitted which indicates + // which ones they are. The result is captured into this list so that we can + // update the chain results when the pattern is complete. + SmallVector<SDNode*, 3> ChainNodesMatched; + SmallVector<SDNode*, 3> GlueResultNodesMatched; + + DEBUG(errs() << "ISEL: Starting pattern match on root node: "; + NodeToMatch->dump(CurDAG); + errs() << '\n'); + + // Determine where to start the interpreter. Normally we start at opcode #0, + // but if the state machine starts with an OPC_SwitchOpcode, then we + // accelerate the first lookup (which is guaranteed to be hot) with the + // OpcodeOffset table. + unsigned MatcherIndex = 0; + + if (!OpcodeOffset.empty()) { + // Already computed the OpcodeOffset table, just index into it. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n"); + + } else if (MatcherTable[0] == OPC_SwitchOpcode) { + // Otherwise, the table isn't computed, but the state machine does start + // with an OPC_SwitchOpcode instruction. Populate the table now, since this + // is the first time we're selecting an instruction. + unsigned Idx = 1; + while (1) { + // Get the size of this case. + unsigned CaseSize = MatcherTable[Idx++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, Idx); + if (CaseSize == 0) break; + + // Get the opcode, add the index to the table. + uint16_t Opc = MatcherTable[Idx++]; + Opc |= (unsigned short)MatcherTable[Idx++] << 8; + if (Opc >= OpcodeOffset.size()) + OpcodeOffset.resize((Opc+1)*2); + OpcodeOffset[Opc] = Idx; + Idx += CaseSize; + } + + // Okay, do the lookup for the first opcode. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + } + + while (1) { + assert(MatcherIndex < TableSize && "Invalid index"); +#ifndef NDEBUG + unsigned CurrentOpcodeIndex = MatcherIndex; +#endif + BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; + switch (Opcode) { + case OPC_Scope: { + // Okay, the semantics of this operation are that we should push a scope + // then evaluate the first child. However, pushing a scope only to have + // the first check fail (which then pops it) is inefficient. If we can + // determine immediately that the first check (or first several) will + // immediately fail, don't even bother pushing a scope for them. + unsigned FailIndex; + + while (1) { + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + // Found the end of the scope with no match. + if (NumToSkip == 0) { + FailIndex = 0; + break; + } + + FailIndex = MatcherIndex+NumToSkip; + + unsigned MatcherIndexOfPredicate = MatcherIndex; + (void)MatcherIndexOfPredicate; // silence warning. + + // If we can't evaluate this predicate without pushing a scope (e.g. if + // it is a 'MoveParent') or if the predicate succeeds on this node, we + // push the scope and evaluate the full predicate chain. + bool Result; + MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N, + Result, *this, RecordedNodes); + if (!Result) + break; + + DEBUG(errs() << " Skipped scope entry (due to false predicate) at " + << "index " << MatcherIndexOfPredicate + << ", continuing at " << FailIndex << "\n"); + ++NumDAGIselRetries; + + // Otherwise, we know that this case of the Scope is guaranteed to fail, + // move to the next case. + MatcherIndex = FailIndex; + } + + // If the whole scope failed to match, bail. + if (FailIndex == 0) break; + + // Push a MatchScope which indicates where to go if the first child fails + // to match. + MatchScope NewEntry; + NewEntry.FailIndex = FailIndex; + NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end()); + NewEntry.NumRecordedNodes = RecordedNodes.size(); + NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); + NewEntry.InputChain = InputChain; + NewEntry.InputGlue = InputGlue; + NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); + NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty(); + MatchScopes.push_back(NewEntry); + continue; + } + case OPC_RecordNode: { + // Remember this node, it may end up being an operand in the pattern. + SDNode *Parent = 0; + if (NodeStack.size() > 1) + Parent = NodeStack[NodeStack.size()-2].getNode(); + RecordedNodes.push_back(std::make_pair(N, Parent)); + continue; + } + + case OPC_RecordChild0: case OPC_RecordChild1: + case OPC_RecordChild2: case OPC_RecordChild3: + case OPC_RecordChild4: case OPC_RecordChild5: + case OPC_RecordChild6: case OPC_RecordChild7: { + unsigned ChildNo = Opcode-OPC_RecordChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + + RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), + N.getNode())); + continue; + } + case OPC_RecordMemRef: + MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand()); + continue; + + case OPC_CaptureGlueInput: + // If the current node has an input glue, capture it in InputGlue. + if (N->getNumOperands() != 0 && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) + InputGlue = N->getOperand(N->getNumOperands()-1); + continue; + + case OPC_MoveChild: { + unsigned ChildNo = MatcherTable[MatcherIndex++]; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + + case OPC_MoveParent: + // Pop the current node off the NodeStack. + NodeStack.pop_back(); + assert(!NodeStack.empty() && "Node stack imbalance!"); + N = NodeStack.back(); + continue; + + case OPC_CheckSame: + if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; + continue; + case OPC_CheckPatternPredicate: + if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; + continue; + case OPC_CheckPredicate: + if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, + N.getNode())) + break; + continue; + case OPC_CheckComplexPat: { + unsigned CPNum = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, + RecordedNodes[RecNo].first, CPNum, + RecordedNodes)) + break; + continue; + } + case OPC_CheckOpcode: + if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; + continue; + + case OPC_CheckType: + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; + continue; + + case OPC_SwitchOpcode: { + unsigned CurNodeOpcode = N.getOpcode(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + + // If the opcode matches, then we will execute this case. + if (CurNodeOpcode == Opc) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(errs() << " OpcodeSwitch from " << SwitchStart + << " to " << MatcherIndex << "\n"); + continue; + } + + case OPC_SwitchType: { + MVT CurNodeVT = N.getValueType().getSimpleVT(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (CaseVT == MVT::iPTR) + CaseVT = TLI.getPointerTy(); + + // If the VT matches, then we will execute this case. + if (CurNodeVT == CaseVT) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); + continue; + } + case OPC_CheckChild0Type: case OPC_CheckChild1Type: + case OPC_CheckChild2Type: case OPC_CheckChild3Type: + case OPC_CheckChild4Type: case OPC_CheckChild5Type: + case OPC_CheckChild6Type: case OPC_CheckChild7Type: + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + Opcode-OPC_CheckChild0Type)) + break; + continue; + case OPC_CheckCondCode: + if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckValueType: + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; + continue; + case OPC_CheckInteger: + if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckAndImm: + if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + case OPC_CheckOrImm: + if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + + case OPC_CheckFoldableChainNode: { + assert(NodeStack.size() != 1 && "No parent node"); + // Verify that all intermediate nodes between the root and this one have + // a single use. + bool HasMultipleUses = false; + for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) + if (!NodeStack[i].hasOneUse()) { + HasMultipleUses = true; + break; + } + if (HasMultipleUses) break; + + // Check to see that the target thinks this is profitable to fold and that + // we can fold it without inducing cycles in the graph. + if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch) || + !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch, OptLevel, + true/*We validate our own chains*/)) + break; + + continue; + } + case OPC_EmitInteger: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getTargetConstant(Val, VT), (SDNode*)0)); + continue; + } + case OPC_EmitRegister: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + continue; + } + case OPC_EmitRegister2: { + // For targets w/ more than 256 register names, the register enum + // values are stored in two bytes in the matcher table (just like + // opcodes). + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RegNo |= MatcherTable[MatcherIndex++] << 8; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + continue; + } + + case OPC_EmitConvertToTarget: { + // Convert from IMM/FPIMM to target version. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + SDValue Imm = RecordedNodes[RecNo].first; + + if (Imm->getOpcode() == ISD::Constant) { + const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); + Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true); + } else if (Imm->getOpcode() == ISD::ConstantFP) { + const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); + Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true); + } + + RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); + continue; + } + + case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 + case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 + // These are space-optimized forms of OPC_EmitMergeInputChains. + assert(InputChain.getNode() == 0 && + "EmitMergeInputChains should be the first chain producing node"); + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].first.hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (InputChain.getNode() == 0) + break; // Failed to merge. + continue; + } + + case OPC_EmitMergeInputChains: { + assert(InputChain.getNode() == 0 && + "EmitMergeInputChains should be the first chain producing node"); + // This node gets a list of nodes we matched in the input that have + // chains. We want to token factor all of the input chains to these nodes + // together. However, if any of the input chains is actually one of the + // nodes matched in this pattern, then we have an intra-match reference. + // Ignore these because the newly token factored chain should not refer to + // the old nodes. + unsigned NumChains = MatcherTable[MatcherIndex++]; + assert(NumChains != 0 && "Can't TF zero chains"); + + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + for (unsigned i = 0; i != NumChains; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].first.hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + } + + // If the inner loop broke out, the match fails. + if (ChainNodesMatched.empty()) + break; + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (InputChain.getNode() == 0) + break; // Failed to merge. + + continue; + } + + case OPC_EmitCopyToReg: { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + + if (InputChain.getNode() == 0) + InputChain = CurDAG->getEntryNode(); + + InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), + DestPhysReg, RecordedNodes[RecNo].first, + InputGlue); + + InputGlue = InputChain.getValue(1); + continue; + } + + case OPC_EmitNodeXForm: { + unsigned XFormNo = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); + continue; + } + + case OPC_EmitNode: + case OPC_MorphNodeTo: { + uint16_t TargetOpc = MatcherTable[MatcherIndex++]; + TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; + // Get the result VT list. + unsigned NumVTs = MatcherTable[MatcherIndex++]; + SmallVector<EVT, 4> VTs; + for (unsigned i = 0; i != NumVTs; ++i) { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; + VTs.push_back(VT); + } + + if (EmitNodeInfo & OPFL_Chain) + VTs.push_back(MVT::Other); + if (EmitNodeInfo & OPFL_GlueOutput) + VTs.push_back(MVT::Glue); + + // This is hot code, so optimize the two most common cases of 1 and 2 + // results. + SDVTList VTList; + if (VTs.size() == 1) + VTList = CurDAG->getVTList(VTs[0]); + else if (VTs.size() == 2) + VTList = CurDAG->getVTList(VTs[0], VTs[1]); + else + VTList = CurDAG->getVTList(VTs.data(), VTs.size()); + + // Get the operand list. + unsigned NumOps = MatcherTable[MatcherIndex++]; + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumOps; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); + Ops.push_back(RecordedNodes[RecNo].first); + } + + // If there are variadic operands to add, handle them now. + if (EmitNodeInfo & OPFL_VariadicInfo) { + // Determine the start index to copy from. + unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); + FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; + assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && + "Invalid variadic node"); + // Copy all of the variadic operands, not including a potential glue + // input. + for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); + i != e; ++i) { + SDValue V = NodeToMatch->getOperand(i); + if (V.getValueType() == MVT::Glue) break; + Ops.push_back(V); + } + } + + // If this has chain/glue inputs, add them. + if (EmitNodeInfo & OPFL_Chain) + Ops.push_back(InputChain); + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0) + Ops.push_back(InputGlue); + + // Create the node. + SDNode *Res = 0; + if (Opcode != OPC_MorphNodeTo) { + // If this is a normal EmitNode command, just create the new node and + // add the results to the RecordedNodes list. + Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), + VTList, Ops.data(), Ops.size()); + + // Add all the non-glue/non-chain results to the RecordedNodes list. + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), + (SDNode*) 0)); + } + + } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), + EmitNodeInfo); + } else { + // NodeToMatch was eliminated by CSE when the target changed the DAG. + // We will visit the equivalent node later. + DEBUG(dbgs() << "Node was eliminated by CSE\n"); + return 0; + } + + // If the node had chain/glue results, update our notion of the current + // chain and glue. + if (EmitNodeInfo & OPFL_GlueOutput) { + InputGlue = SDValue(Res, VTs.size()-1); + if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-2); + } else if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-1); + + // If the OPFL_MemRefs glue is set on this node, slap all of the + // accumulated memrefs onto it. + // + // FIXME: This is vastly incorrect for patterns with multiple outputs + // instructions that access memory and for ComplexPatterns that match + // loads. + if (EmitNodeInfo & OPFL_MemRefs) { + // Only attach load or store memory operands if the generated + // instruction may load or store. + const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + bool mayLoad = MCID.mayLoad(); + bool mayStore = MCID.mayStore(); + + unsigned NumMemRefs = 0; + for (SmallVector<MachineMemOperand*, 2>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + ++NumMemRefs; + } else if ((*I)->isStore()) { + if (mayStore) + ++NumMemRefs; + } else { + ++NumMemRefs; + } + } + + MachineSDNode::mmo_iterator MemRefs = + MF->allocateMemRefsArray(NumMemRefs); + + MachineSDNode::mmo_iterator MemRefsPos = MemRefs; + for (SmallVector<MachineMemOperand*, 2>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + *MemRefsPos++ = *I; + } else if ((*I)->isStore()) { + if (mayStore) + *MemRefsPos++ = *I; + } else { + *MemRefsPos++ = *I; + } + } + + cast<MachineSDNode>(Res) + ->setMemRefs(MemRefs, MemRefs + NumMemRefs); + } + + DEBUG(errs() << " " + << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") + << " node: "; Res->dump(CurDAG); errs() << "\n"); + + // If this was a MorphNodeTo then we're completely done! + if (Opcode == OPC_MorphNodeTo) { + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, true); + return Res; + } + + continue; + } + + case OPC_MarkGlueResults: { + unsigned NumNodes = MatcherTable[MatcherIndex++]; + + // Read and remember all the glue-result nodes. + for (unsigned i = 0; i != NumNodes; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + } + continue; + } + + case OPC_CompleteMatch: { + // The match has been completed, and any new nodes (if any) have been + // created. Patch up references to the matched dag to use the newly + // created nodes. + unsigned NumResults = MatcherTable[MatcherIndex++]; + + for (unsigned i = 0; i != NumResults; ++i) { + unsigned ResSlot = MatcherTable[MatcherIndex++]; + if (ResSlot & 128) + ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); + + assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + SDValue Res = RecordedNodes[ResSlot].first; + + assert(i < NodeToMatch->getNumValues() && + NodeToMatch->getValueType(i) != MVT::Other && + NodeToMatch->getValueType(i) != MVT::Glue && + "Invalid number of results to complete!"); + assert((NodeToMatch->getValueType(i) == Res.getValueType() || + NodeToMatch->getValueType(i) == MVT::iPTR || + Res.getValueType() == MVT::iPTR || + NodeToMatch->getValueType(i).getSizeInBits() == + Res.getValueType().getSizeInBits()) && + "invalid replacement"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); + } + + // If the root node defines glue, add it to the glue nodes to update list. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue) + GlueResultNodesMatched.push_back(NodeToMatch); + + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, false); + + assert(NodeToMatch->use_empty() && + "Didn't replace all uses of the node?"); + + // FIXME: We just return here, which interacts correctly with SelectRoot + // above. We should fix this to not return an SDNode* anymore. + return 0; + } + } + + // If the code reached this point, then the match failed. See if there is + // another child to try in the current 'Scope', otherwise pop it until we + // find a case to check. + DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + ++NumDAGIselRetries; + while (1) { + if (MatchScopes.empty()) { + CannotYetSelect(NodeToMatch); + return 0; + } + + // Restore the interpreter state back to the point where the scope was + // formed. + MatchScope &LastScope = MatchScopes.back(); + RecordedNodes.resize(LastScope.NumRecordedNodes); + NodeStack.clear(); + NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end()); + N = NodeStack.back(); + + if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) + MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); + MatcherIndex = LastScope.FailIndex; + + DEBUG(errs() << " Continuing at " << MatcherIndex << "\n"); + + InputChain = LastScope.InputChain; + InputGlue = LastScope.InputGlue; + if (!LastScope.HasChainNodesMatched) + ChainNodesMatched.clear(); + if (!LastScope.HasGlueResultNodesMatched) + GlueResultNodesMatched.clear(); + + // Check to see what the offset is at the new MatcherIndex. If it is zero + // we have reached the end of this scope, otherwise we have another child + // in the current scope to try. + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + + // If we have another child in this scope to match, update FailIndex and + // try it. + if (NumToSkip != 0) { + LastScope.FailIndex = MatcherIndex+NumToSkip; + break; + } + + // End of this scope, pop it and try the next child in the containing + // scope. + MatchScopes.pop_back(); + } + } +} + + + +void SelectionDAGISel::CannotYetSelect(SDNode *N) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot select: "; + + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && + N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && + N->getOpcode() != ISD::INTRINSIC_VOID) { + N->printrFull(Msg, CurDAG); + Msg << "\nIn function: " << MF->getName(); + } else { + bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; + unsigned iid = + cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + if (iid < Intrinsic::num_intrinsics) + Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid); + else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) + Msg << "target intrinsic %" << TII->getName(iid); + else + Msg << "unknown intrinsic #" << iid; + } + report_fatal_error(Msg.str()); +} + +char SelectionDAGISel::ID = 0; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 0000000..b752b482 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,299 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { + + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static bool hasEdgeDestLabels() { + return true; + } + + static unsigned numEdgeDestLabels(const void *Node) { + return ((const SDNode *) Node)->getNumValues(); + } + + static std::string getEdgeDestLabel(const void *Node, unsigned i) { + return ((const SDNode *) Node)->getValueType(i).getEVTString(); + } + + template<typename EdgeIter> + static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); + } + + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge + /// should actually target another edge source, not a node. If this method + /// is implemented, getEdgeTarget should be implemented. + template<typename EdgeIter> + static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + return true; + } + + /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is + /// called to determine which outgoing edge of Node is the target of this + /// edge. + template<typename EdgeIter> + static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + SDNode *TargetNode = *I; + SDNodeIterator NI = SDNodeIterator::begin(TargetNode); + std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo()); + return NI; + } + + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SDNode *Node, + const SelectionDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template<typename EdgeIter> + static std::string getEdgeAttributes(const void *Node, EdgeIter EI, + const SelectionDAG *Graph) { + SDValue Op = EI.getNode()->getOperand(EI.getOperand()); + EVT VT = Op.getValueType(); + if (VT == MVT::Glue) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getSimpleNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Result = Node->getOperationName(G); + { + raw_string_ostream OS(Result); + Node->print_details(OS, G); + } + return Result; + } + std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter<SelectionDAG*> &GW) { + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + if (G->getRoot().getNode()) + GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + "color=blue,style=dashed"); + } + }; +} + +std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G); +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph(const std::string &Title) { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getName(), + false, Title); +#else + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +// This overload is defined out-of-line here instead of just using a +// default parameter because this is easiest for gdb to call. +void SelectionDAG::viewGraph() { + viewGraph(""); +} + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map<const SDNode *, std::string>::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + errs() << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +/// setSubgraphColorHelper - Implement setSubgraphColor. Return +/// whether we truncated the search. +/// +bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited, + int level, bool &printed) { + bool hit_limit = false; + +#ifndef NDEBUG + if (level >= 20) { + if (!printed) { + printed = true; + DEBUG(dbgs() << "setSubgraphColor hit max level\n"); + } + return true; + } + + unsigned oldSize = visited.size(); + visited.insert(N); + if (visited.size() != oldSize) { + setGraphColor(N, Color); + for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N); + i != iend; + ++i) { + hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit; + } + } +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif + return hit_limit; +} + +/// setSubgraphColor - Convenience for setting subgraph color attribute. +/// +void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { +#ifndef NDEBUG + DenseSet<SDNode *> visited; + bool printed = false; + if (setSubgraphColorHelper(N, Color, visited, 0, printed)) { + // Visually mark that we hit the limit + if (strcmp(Color, "red") == 0) { + setSubgraphColorHelper(N, "blue", visited, 0, printed); + } else if (strcmp(Color, "yellow") == 0) { + setSubgraphColorHelper(N, "green", visited, 0, printed); + } + } + +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream O(s); + O << "SU(" << SU->NodeNum << "): "; + if (SU->getNode()) { + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + O << DOTGraphTraits<SelectionDAG*> + ::getSimpleNodeLabel(GluedNodes.back(), DAG); + GluedNodes.pop_back(); + if (!GluedNodes.empty()) + O << "\n "; + } + } else { + O << "CROSS RC COPY"; + } + return O.str(); +} + +void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { + if (DAG) { + // Draw a special "GraphRoot" node to indicate the root of the graph. + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + const SDNode *N = DAG->getRoot().getNode(); + if (N && N->getNodeId() != -1) + GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + "color=blue,style=dashed"); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 0000000..f5fc66c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,2593 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cctype> +using namespace llvm; + +/// NOTE: The constructor takes ownership of TLOF. +TargetLowering::TargetLowering(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) + : TargetLoweringBase(tm, tlof) {} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; +} + +/// Check whether a given call node is in tail position within its function. If +/// so, it sets Chain to the input chain of the tail call. +bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain) const { + const Function *F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex) + .removeAttribute(Attribute::NoAlias).hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; + + // Check if the only use is a function return node. + return isUsedByReturnOnly(Node, Chain); +} + + +/// Generate a libcall taking the given operands as arguments and returning a +/// result of type RetVT. +SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) const { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI); + + return CallInfo.first; +} + + +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : + (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : + (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: llvm_unreachable("Do not know how to soften this setcc!"); + } + } + + // Use the target specific return value for comparions lib calls. + EVT RetVT = getCmpLibcallReturnType(); + SDValue Ops[2] = { NewLHS, NewRHS }; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + +/// getJumpTableEncoding - Return the entry encoding for a jump table in the +/// current function. The returned value is a member of the +/// MachineJumpTableInfo::JTEntryKind enum. +unsigned TargetLowering::getJumpTableEncoding() const { + // In non-pic modes, just use the address of a block. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return MachineJumpTableInfo::EK_BlockAddress; + + // In PIC mode, if the target supports a GPRel32 directive, use it. + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) + return MachineJumpTableInfo::EK_GPRel32BlockAddress; + + // Otherwise, use a label difference. + return MachineJumpTableInfo::EK_LabelDifference32; +} + +SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + // If our PIC model is GP relative, use the global offset table as the base. + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0)); + + return Table; +} + +/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the +/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an +/// MCExpr. +const MCExpr * +TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI,MCContext &Ctx) const{ + // The normal PIC reloc base is the label at the start of the jump table. + return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx); +} + +bool +TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // Assume that everything is safe in static mode. + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return true; + + // In dynamic-no-pic mode, assume that known defined values are safe. + if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && + GA && + !GA->getGlobal()->isDeclaration() && + !GA->getGlobal()->isWeakForLinker()) + return true; + + // Otherwise assume nothing is safe. + return false; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, + const APInt &Demanded) { + DebugLoc dl = Op.getDebugLoc(); + + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch (Op.getOpcode()) { + default: break; + case ISD::XOR: + case ISD::AND: + case ISD::OR: { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!C) return false; + + if (Op.getOpcode() == ISD::XOR && + (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + return false; + + // if we can expand it to have all bits set, do it + if (C->getAPIntValue().intersects(~Demanded)) { + EVT VT = Op.getValueType(); + SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), + DAG.getConstant(Demanded & + C->getAPIntValue(), + VT)); + return CombineTo(Op, New); + } + + break; + } + } + + return false; +} + +/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the +/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening +/// cast, but it could be generalized for targets with other types of +/// implicit widening casts. +bool +TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + DebugLoc dl) { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros(); + unsigned SmallVTBits = DemandedSize; + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(1))); + bool NeedZext = DemandedSize > SmallVTBits; + SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, + dl, Op.getValueType(), X); + return CombineTo(Op, Z); + } + } + return false; +} + +/// SimplifyDemandedBits - Look at Op. At this point, we know that only the +/// DemandedMask bits of the result of Op are ever used downstream. If we can +/// use this information to simplify Op, create a new simplified DAG node and +/// return true, returning the original and new nodes in Old and New. Otherwise, +/// analyze the expression and return a mask of KnownOne and KnownZero bits for +/// the expression (used to simplify the caller). The KnownZero/One bits may +/// only be accurate for those bits in the DemandedMask. +bool TargetLowering::SimplifyDemandedBits(SDValue Op, + const APInt &DemandedMask, + APInt &KnownZero, + APInt &KnownOne, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned BitWidth = DemandedMask.getBitWidth(); + assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && + "Mask size mismatches value type size!"); + APInt NewMask = DemandedMask; + DebugLoc dl = Op.getDebugLoc(); + + // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); + + // Other users may use these bits. + if (!Op.getNode()->hasOneUse()) { + if (Depth != 0) { + // If not at the root, Just compute the KnownZero/KnownOne bits to + // simplify things downstream. + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the NewMask to all bits. + NewMask = APInt::getAllOnesValue(BitWidth); + } else if (DemandedMask == 0) { + // Not demanding any bits from Op. + if (Op.getOpcode() != ISD::UNDEF) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return false; + } else if (Depth == 6) { // Limit search depth. + return false; + } + + APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; + return false; // Don't fall through, will infinitely loop. + case ISD::AND: + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt LHSZero, LHSOne; + // Do not increment Depth here; that can cause an infinite loop. + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); + // If the LHS already has zeros where RHSC does, this and is dead. + if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + return true; + } + + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((KnownZero & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((KnownZero2 & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1))); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + // NB: it is okay if more bits are known than are requested + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if (KnownOne == KnownOne2) { // set bits are the same on both sides + EVT VT = Op.getValueType(); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + Op.getOperand(0), ANDC)); + } + } + + // If the RHS is a constant, see if we can simplify it. + // for XOR, we prefer to force bits to 1 if they will make a -1. + // if we can't force bits, try to shrink constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt Expanded = C->getAPIntValue() | (~NewMask); + // if we can expand it to have all bits set, do it + if (Expanded.isAllOnesValue()) { + if (Expanded != C->getAPIntValue()) { + EVT VT = Op.getValueType(); + SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), + TLO.DAG.getConstant(Expanded, VT)); + return TLO.CombineTo(Op, New); + } + // if it already has all the bits set, nothing to change + // but don't shrink either! + } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + return true; + } + } + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SHL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (InOp.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + + // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits + // are not demanded. This will likely allow the anyext to be folded away. + if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = InOp.getNode()->getOperand(0); + EVT InnerVT = InnerOp.getValueType(); + unsigned InnerBits = InnerVT.getSizeInBits(); + if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && + isTypeDesirableForOp(ISD::SHL, InnerVT)) { + EVT ShTy = getShiftAmountTy(InnerVT); + if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) + ShTy = InnerVT; + SDValue NarrowShl = + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, ShTy)); + return + TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), + NarrowShl)); + } + } + + KnownZero <<= SA->getZExtValue(); + KnownOne <<= SA->getZExtValue(); + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue()); + } + break; + case ISD::SRL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + EVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + unsigned VTSize = VT.getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + if (InOp.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (NewMask == 1) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); + + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + EVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + APInt InDemandedMask = (NewMask << ShAmt); + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + if (HighBits.intersects(NewMask)) + InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bit, adjusted to where it is now in the mask. + APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), + Op.getOperand(1))); + } else if (KnownOne.intersects(SignBit)) { // New bits are known one. + KnownOne |= HighBits; + } + } + break; + case ISD::SIGN_EXTEND_INREG: { + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); + // If we only care about the highest bit, don't bother shifting right. + if (MsbMask == DemandedMask) { + unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, ShiftAmt)); + } + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, + BitWidth - ExVT.getScalarType().getSizeInBits()); + + // If none of the extended bits are demanded, eliminate the sextinreg. + if ((NewBits & NewMask) == 0) + return TLO.CombineTo(Op, Op.getOperand(0)); + + APInt InSignBit = + APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt InputDemandedBits = + APInt::getLowBitsSet(BitWidth, + ExVT.getScalarType().getSizeInBits()) & + NewMask; + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); + + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::ZERO_EXTEND: { + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt InMask = NewMask.trunc(OperandBitWidth); + + // If none of the top bits are demanded, convert this into an any_extend. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; + if (!NewBits.intersects(NewMask)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); + APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); + APInt NewBits = ~InMask & NewMask; + + // If none of the top bits are demanded, convert this into an any_extend. + if (NewBits == 0) + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + APInt InDemandedBits = InMask & NewMask; + InDemandedBits |= InSignBit; + InDemandedBits = InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + + // If the sign bit is known zero, convert this to a zero extend. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // If the sign bit is known one, the top bits match. + if (KnownOne.intersects(InSignBit)) { + KnownOne |= NewBits; + assert((KnownZero & NewBits) == 0); + } else { // Otherwise, top bits aren't known. + assert((KnownOne & NewBits) == 0); + assert((KnownZero & NewBits) == 0); + } + break; + } + case ISD::ANY_EXTEND: { + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt InMask = NewMask.trunc(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + break; + } + case ISD::TRUNCATE: { + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt TruncMask = NewMask.zext(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Op.getOperand(0).getNode()->hasOneUse()) { + SDValue In = Op.getOperand(0); + switch (In.getOpcode()) { + default: break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (TLO.LegalTypes() && + !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. + break; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + if (!ShAmt) + break; + SDValue Shift = In.getOperand(1); + if (TLO.LegalTypes()) { + uint64_t ShVal = ShAmt->getZExtValue(); + Shift = + TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + } + + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + Shift)); + } + break; + } + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + case ISD::AssertZext: { + // AssertZext demands all of the high bits, plus any of the low bits + // demanded by its users. + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + KnownZero |= ~InMask & NewMask; + break; + } + case ISD::BITCAST: + // If this is an FP->Int bitcast and if the sign bit is the only + // thing demanded, turn this into a FGETSIGN. + if (!TLO.LegalOperations() && + !Op.getValueType().isVector() && + !Op.getOperand(0).getValueType().isVector() && + NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + Op.getOperand(0).getValueType().isFloatingPoint()) { + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); + bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); + unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits(); + if (!OpVTLegal && OpVTSizeInBits > 32) + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); + unsigned ShVal = Op.getValueType().getSizeInBits()-1; + SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), + Sign, ShAmt)); + } + } + break; + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + APInt LoMask = APInt::getLowBitsSet(BitWidth, + BitWidth - NewMask.countLeadingZeros()); + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + // See if the operation should be performed at a smaller bit width. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + } + // FALL THROUGH + default: + // Just use ComputeMaskedBits to compute output bits. + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if ((NewMask & (KnownZero|KnownOne)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + + return false; +} + +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the +/// KnownZero/KnownOne bitsets. +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); +} + +/// ComputeNumSignBitsForTargetNode - This method can be implemented by +/// targets that want to expose additional information about sign bits to the +/// DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + +/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly +/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// determine which bit is set. +/// +static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { + // A left-shift of a constant one will have exactly one bit set, because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue() == 1) + return true; + + // Similarly, a right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue().isSignBit()) + return true; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to ComputeMaskedBits to catch other known cases. + EVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// and cc. If it is unable to simplify it, return a null SDValue. +SDValue +TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, DebugLoc dl) const { + SelectionDAG &DAG = DCI.DAG; + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return DAG.getConstant(1, VT); + } + + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + if (isa<ConstantSDNode>(N0.getNode())) + return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + const APInt &ShAmt + = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + SDValue CTPOP = N0; + // Look through truncs that don't change the value of a ctpop. + if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) + CTPOP = N0.getOperand(0); + + if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && + (N0 == CTPOP || N0.getValueType().getSizeInBits() > + Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ + SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, + DAG.getConstant(1, CTVT)); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); + } + + // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. + } + + // (zext x) == C --> x == (trunc C) + if (DCI.isBeforeLegalize() && N0->hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreZExt; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreZExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreZExt = N0->getOperand(0); + } + } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { + // ZEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreZExt = N0; + } + } + + // Make sure we're not losing bits from the constant. + if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + } + } + + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + APInt bestMask; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed()) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + unsigned maskWidth = origWidth; + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + const APInt &Mask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + APInt newMask = APInt::getLowBitsSet(maskWidth, width); + for (unsigned offset=0; offset<origWidth/width; offset++) { + if ((newMask & Mask) == Mask) { + if (!getDataLayout()->isLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask.lshr(offset * (width/8) * 8); + bestWidth = width; + break; + } + newMask = newMask << width; + } + } + } + if (bestWidth) { + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); + if (newVT.isRound()) { + EVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), + false, false, false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask.trunc(bestWidth), + newVT)), + DAG.getConstant(0LL, newVT), Cond); + } + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: { + EVT newVT = N0.getOperand(0).getValueType(); + if (DCI.isBeforeLegalizeOps() || + (isOperationLegal(ISD::SETCC, newVT) && + getCondCodeAction(Cond, newVT.getSimpleVT())==Legal)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(C1.trunc(InSize), newVT), + Cond); + break; + } + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + EVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the constant doesn't fit into the number of bits for the source of + // the sign extension, it is impossible for both sides to be equal. + if (C1.getMinSignedBits() > ExtSrcTyBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); + + SDValue ZextOp; + EVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC && + isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); + if (TrueWhenTrue) + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } else if (N1C->getAPIntValue() == 1 && + (VT == MVT::i1 || + getBooleanContents(false) == ZeroOrOneBooleanContent)) { + SDValue Op0 = N0; + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + + if ((Op0.getOpcode() == ISD::XOR) && + Op0.getOperand(0).getOpcode() == ISD::SETCC && + Op0.getOperand(1).getOpcode() == ISD::SETCC) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), + Cond); + } + if (Op0.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Op0.getOperand(1)) && + cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. + if (Op0.getValueType().bitsGT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + else if (Op0.getValueType().bitsLT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + if (Op0.getOpcode() == ISD::AssertZext && + cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + // X >= C0 --> X > (C0-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1-1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + // X <= C0 --> X < (C0+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1+1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + (VT == N0.getValueType() || + (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (AndRHS->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy))); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(C1.logBase2(), ShiftTy))); + } + } + } + + if (C1.getMinSignedBits() <= 64 && + !isLegalICmpImmediate(C1.getSExtValue())) { + // (X & -256) == 256 -> (X >> 8) == 1 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + const APInt &AndRHSC = AndRHS->getAPIntValue(); + if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + unsigned ShiftBits = AndRHSC.countTrailingZeros(); + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } + } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || + Cond == ISD::SETULE || Cond == ISD::SETUGT) { + bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); + // X < 0x100000000 -> (X >> 32) < 1 + // X >= 0x100000000 -> (X >> 32) >= 1 + // X <= 0x0ffffffff -> (X >> 32) < 1 + // X > 0x0ffffffff -> (X >> 32) >= 1 + unsigned ShiftBits; + APInt NewC = C1; + ISD::CondCode NewCond = Cond; + if (AdjOne) { + ShiftBits = C1.countTrailingOnes(); + NewC = NewC + 1; + NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + } else { + ShiftBits = C1.countTrailingZeros(); + } + NewC = NewC.lshr(ShiftBits); + if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + } + } + } + } + + if (isa<ConstantFPSDNode>(N0.getNode())) { + // Constant fold or commute setcc. + SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); + if (O.getNode()) return O; + } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + // If the RHS of an FP comparison is a constant, simplify it away in + // some cases. + if (CFP->getValueAPF().isNaN()) { + // If an operand is known to be a nan, we can fold it. + switch (ISD::getUnorderedFlavor(Cond)) { + default: llvm_unreachable("Unknown flavor!"); + case 0: // Known false. + return DAG.getConstant(0, VT); + case 1: // Known true. + return DAG.getConstant(1, VT); + case 2: // Undefined. + return DAG.getUNDEF(VT); + } + } + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(dl, VT, N0, N0, Cond); + + // If the condition is not legal, see if we can find an equivalent one + // which is legal. + if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { + // If the comparison was an awkward floating-point == or != and one of + // the comparison operands is infinity or negative infinity, convert the + // condition to a less-awkward <= or >=. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); + } else { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + } + } + } + } + + if (N0 == N1) { + // The sext(setcc()) => setcc() optimization relies on the appropriate + // constant being emitted. + uint64_t EqVal = 0; + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond); + break; + case ZeroOrNegativeOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; + break; + } + + // We can always fold X == X for integer setcc's. + if (N0.getValueType().isInteger()) { + return DAG.getConstant(EqVal, VT); + } + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getConstant(EqVal, VT); + if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) + return DAG.getConstant(EqVal, VT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || + getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getValueType().isInteger()) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (DAG.isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + Cond); + } + } + + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue()- + LHSR->getAPIntValue(), + N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) + return + DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { + return + DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); + } + + // Simplify (X+Z) == X --> Z == 0 + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Z has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.getNode()->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, + DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } + } + } + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) { + // Simplify X == (X+Z) --> Z == 0 + if (N1.getOperand(0) == N0) + return DAG.getSetCC(dl, VT, N1.getOperand(1), + DAG.getConstant(0, N1.getValueType()), Cond); + if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) + return DAG.getSetCC(dl, VT, N1.getOperand(0), + DAG.getConstant(0, N1.getValueType()), Cond); + if (N1.getNode()->hasOneUse()) { + assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + // X == (Z-X) --> X<<1 == Z + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, + DAG.getConstant(1, getShiftAmountTy(N0.getValueType()))); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); + } + } + } + + // Simplify x&y == y to x&y != 0 if y has exactly one bit set. + // Note that where y is variable and is known to have at most + // one bit set (for example, if it is z&1) we cannot do this; + // the expressions are not equivalent when y==0. + if (N0.getOpcode() == ISD::AND) + if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { + if (ValueHasExactlyOneBitSet(N1, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } + } + if (N1.getOpcode() == ISD::AND) + if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { + if (ValueHasExactlyOneBitSet(N0, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } + } + } + + // Fold away ALL boolean setcc's. + SDValue Temp; + if (N0.getValueType() == MVT::i1 && foldBooleans) { + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> ~(X^Y) + Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNOT(dl, Temp, MVT::i1); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + break; + } + if (VT != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.getNode()); + // FIXME: If running after legalize, we probably can't do this. + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDValue(); +} + +/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the +/// node is a GlobalAddress + offset. +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, + int64_t &Offset) const { + if (isa<GlobalAddressSDNode>(N)) { + GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); + GA = GASD->getGlobal(); + Offset += GASD->getOffset(); + return true; + } + + if (N->getOpcode() == ISD::ADD) { + SDValue N1 = N->getOperand(0); + SDValue N2 = N->getOperand(1); + if (isGAPlusOffset(N1.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } + } + + return false; +} + + +SDValue TargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(const std::string &Constraint) const { + unsigned S = Constraint.size(); + + if (S == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 'E': // Floating Point Constant + case 'F': // Floating Point Constant + case 's': // Relocatable Constant + case 'p': // Address. + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case '<': + case '>': + return C_Other; + } + } + + if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { + if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}" + return C_Memory; + return C_Register; + } + return C_Unknown; +} + +/// LowerXConstraint - try to replace an X constraint, which matches anything, +/// with another that has more specific requirements based on the type of the +/// corresponding operand. +const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ + if (ConstraintVT.isInteger()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "f"; // works for many targets + return 0; +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock) { + Ops.push_back(Op); + return; + } + // fall through + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': { // Relocatable Constant + // These operands are interested in values of the form (GV+C), where C may + // be folded in as an offset of GV, or it may be explicitly added. Also, it + // is possible and fine if either GV or C are missing. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + + // If we have "(add GV, C)", pull out GV/C + if (Op.getOpcode() == ISD::ADD) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); + if (C == 0 || GA == 0) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); + } + if (C == 0 || GA == 0) + C = 0, GA = 0; + } + + // If we find a valid operand, map to the TargetXXX version so that the + // value itself doesn't get selected. + if (GA) { // Either &GV or &GV+C + if (ConstraintLetter != 'n') { + int64_t Offs = GA->getOffset(); + if (C) Offs += C->getZExtValue(); + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + C ? C->getDebugLoc() : DebugLoc(), + Op.getValueType(), Offs)); + return; + } + } + if (C) { // just C, no GV. + // Simple constants are not allowed for 's'. + if (ConstraintLetter != 's') { + // gcc prints these as sign extended. Sign extend value to 64 bits + // now; without this it would get ZExt'd later in + // ScheduleDAGSDNodes::EmitNode, which is very generic. + Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + MVT::i64)); + return; + } + } + break; + } + } +} + +std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint[0] != '{') + return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); + assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + StringRef RegName(Constraint.data()+1, Constraint.size()-2); + + std::pair<unsigned, const TargetRegisterClass*> R = + std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); + + // Figure out which register class contains this reg. + const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); + for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), + E = RI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + if (!isLegalRC(RC)) + continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (RegName.equals_lower(RI->getName(*I))) { + std::pair<unsigned, const TargetRegisterClass*> S = + std::make_pair(*I, RC); + + // If this register class has the requested value type, return it, + // otherwise keep searching and return the first class found + // if no other is found which explicitly has the requested type. + if (RC->hasType(VT)) + return S; + else if (!R.second) + R = S; + } + } + } + + return R; +} + +//===----------------------------------------------------------------------===// +// Constraint Selection. + +/// isMatchingInputConstraint - Return true of this is an input operand that is +/// a matching constraint like "4". +bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return isdigit(static_cast<unsigned char>(ConstraintCode[0])); +} + +/// getMatchedOperand - If this is an input matching constraint, this method +/// returns the output operand it matches. +unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return atoi(ConstraintCode.c_str()); +} + + +/// ParseConstraints - Split up the constraint string from the inline +/// assembly value into the specific constraints and their prefixes, +/// and also tie in the associated operand values. +/// If this returns an empty vector, and if the constraint string itself +/// isn't empty, there was an error parsing. +TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( + ImmutableCallSite CS) const { + /// ConstraintOperands - Information about all of the constraints. + AsmOperandInfoVector ConstraintOperands; + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + unsigned maCount = 0; // Largest number of multiple alternative constraints. + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + InlineAsm::ConstraintInfoVector + ConstraintInfos = IA->ParseConstraints(); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); + AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Update multiple alternative constraint count. + if (OpInfo.multipleAlternatives.size() > maCount) + maCount = OpInfo.multipleAlternatives.size(); + + OpInfo.ConstraintVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && + "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = getSimpleValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + if (OpInfo.CallOperandVal) { + llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); + if (OpInfo.isIndirect) { + llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpInfo.ConstraintVT = + MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); + break; + } + } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { + OpInfo.ConstraintVT = MVT::getIntegerVT( + 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); + } else { + OpInfo.ConstraintVT = MVT::getVT(OpTy, true); + } + } + } + + // If we have multiple alternative constraints, select the best alternative. + if (ConstraintInfos.size()) { + if (maCount) { + unsigned bestMAIndex = 0; + int bestWeight = -1; + // weight: -1 = invalid match, and 0 = so-so match to 5 = good match. + int weight = -1; + unsigned maIndex; + // Compute the sums of the weights for each alternative, keeping track + // of the best (highest weight) one so far. + for (maIndex = 0; maIndex < maCount; ++maIndex) { + int weightSum = 0; + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + if (OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is an output operand with a matching input operand, + // look up the matching input. If their types mismatch, e.g. one + // is an integer, the other is floating point, or their sizes are + // different, flag it as an maCantMatch. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + weightSum = -1; // Can't match. + break; + } + } + } + weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); + if (weight == -1) { + weightSum = -1; + break; + } + weightSum += weight; + } + // Update best. + if (weightSum > bestWeight) { + bestWeight = weightSum; + bestMAIndex = maIndex; + } + } + + // Now select chosen alternative in each constraint. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& cInfo = ConstraintOperands[cIndex]; + if (cInfo.Type == InlineAsm::isClobber) + continue; + cInfo.selectAlternative(bestMAIndex); + } + } + } + + // Check and hook up tied operands, choose constraint code to use. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + } + + } + } + + return ConstraintOperands; +} + + +/// getConstraintGenerality - Return an integer indicating how general CT +/// is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } + llvm_unreachable("Invalid constraint type"); +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getMultipleConstraintMatchWeight( + AsmOperandInfo &info, int maIndex) const { + InlineAsm::ConstraintCodeVector *rCodes; + if (maIndex >= (int)info.multipleAlternatives.size()) + rCodes = &info.Codes; + else + rCodes = &info.multipleAlternatives[maIndex].Codes; + ConstraintWeight BestWeight = CW_Invalid; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { + ConstraintWeight weight = + getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); + if (weight > BestWeight) + BestWeight = weight; + } + + return BestWeight; +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + case 'i': // immediate integer. + case 'n': // immediate integer with a known value. + if (isa<ConstantInt>(CallOperandVal)) + weight = CW_Constant; + break; + case 's': // non-explicit intregal immediate. + if (isa<GlobalValue>(CallOperandVal)) + weight = CW_Constant; + break; + case 'E': // immediate float if host format. + case 'F': // immediate float. + if (isa<ConstantFP>(CallOperandVal)) + weight = CW_Constant; + break; + case '<': // memory operand with autodecrement. + case '>': // memory operand with autoincrement. + case 'm': // memory operand. + case 'o': // offsettable memory operand + case 'V': // non-offsettable memory operand + weight = CW_Memory; + break; + case 'r': // general register. + case 'g': // general register, memory operand or immediate integer. + // note: Clang converts "g" to "imr". + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_Register; + break; + case 'X': // any operand. + default: + weight = CW_Default; + break; + } + return weight; +} + +/// ChooseConstraint - If there are multiple different constraints that we +/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// This is somewhat tricky: constraints fall into four classes: +/// Other -> immediates and magic values +/// Register -> one specific register +/// RegisterClass -> a group of regs +/// Memory -> memory +/// Ideally, we would pick the most specific constraint possible: if we have +/// something that fits into a register, we would pick it. The problem here +/// is that if we have something that could either be in a register or in +/// memory that use of the register could cause selection of *other* +/// operands to fail: they might only succeed if we pick memory. Because of +/// this the heuristic we use is: +/// +/// 1) If there is an 'other' constraint, and if the operand is valid for +/// that constraint, use it. This makes us take advantage of 'i' +/// constraints when available. +/// 2) Otherwise, pick the most general constraint present. This prefers +/// 'm' over 'r', for example. +/// +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + const TargetLowering &TLI, + SDValue Op, SelectionDAG *DAG) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI.getConstraintType(OpInfo.Codes[i]); + + // If this is an 'other' constraint, see if the operand is valid for it. + // For example, on X86 we might have an 'rI' constraint. If the operand + // is an integer in the range [0..31] we want to use I (saving a load + // of a register), otherwise we must use 'r'. + if (CType == TargetLowering::C_Other && Op.getNode()) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + std::vector<SDValue> ResultOps; + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], + ResultOps, *DAG); + if (!ResultOps.empty()) { + BestType = CType; + BestIdx = i; + break; + } + } + + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +/// ComputeConstraintToUse - Determines the constraint code and constraint +/// type to use for the specific AsmOperandInfo, setting +/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + SelectionDAG *DAG) const { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, *this, Op, DAG); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *v = OpInfo.CallOperandVal; + if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { + OpInfo.CallOperandVal = v; + return; + } + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } + } +} + +/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, + SelectionDAG &DAG) const { + ConstantSDNode *C = cast<ConstantSDNode>(Op2); + APInt d = C->getAPIntValue(); + assert(d != 0 && "Division by zero!"); + + // Shift the value upfront if it is even, so the LSB is one. + unsigned ShAmt = d.countTrailingZeros(); + if (ShAmt) { + // TODO: For UDIV use SRL instead of SRA. + SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + d = d.ashr(ShAmt); + } + + // Calculate the multiplicative inverse, using Newton's method. + APInt t, xn = d; + while ((t = d*xn) != 1) + xn *= APInt(d.getBitWidth(), 2) - t; + + Op2 = DAG.getConstant(xn, Op1.getValueType()); + return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering:: +BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode*> *Created) const { + EVT VT = N->getValueType(0); + DebugLoc dl= N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt::ms magics = d.magic(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : + isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhs or equvialent + // If d > 0 and m < 0, add the numerator + if (d.isStrictlyPositive() && magics.m.isNegative()) { + Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // If d < 0 and m > 0, subtract the numerator. + if (d.isNegative() && magics.m.isStrictlyPositive()) { + Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode(ISD::SRA, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); + if (Created) + Created->push_back(Q.getNode()); + } + // Extract the sign bit and add it to the quotient + SDValue T = + DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy(Q.getValueType()))); + if (Created) + Created->push_back(T.getNode()); + return DAG.getNode(ISD::ADD, dl, VT, Q, T); +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering:: +BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode*> *Created) const { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt::mu magics = N1C.magicu(); + + SDValue Q = N->getOperand(0); + + // If the divisor is even, we can avoid using the expensive fixup by shifting + // the divided value upfront. + if (magics.a != 0 && !N1C[0]) { + unsigned Shift = N1C.countTrailingZeros(); + Q = DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); + if (Created) + Created->push_back(Q.getNode()); + + // Get magic number for the shifted divisor. + magics = N1C.lshr(Shift).magicu(Shift); + assert(magics.a == 0 && "Should use cheap fixup now"); + } + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : + isOperationLegalOrCustom(ISD::MULHU, VT)) + Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); + else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhu or equvialent + if (Created) + Created->push_back(Q.getNode()); + + if (magics.a == 0) { + assert(magics.s < N1C.getBitWidth() && + "We shouldn't generate an undefined shift!"); + return DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); + } else { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + if (Created) + Created->push_back(NPQ.getNode()); + return DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp new file mode 100644 index 0000000..f769b44 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -0,0 +1,23 @@ +//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) + : TD(TM.getDataLayout()) { +} + +TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { +} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp new file mode 100644 index 0000000..10f64c7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp @@ -0,0 +1,452 @@ +//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics for targets that do +// not natively support them (which includes the C backend). Note that the code +// generated is not quite as efficient as algorithms which generate stack maps +// to identify roots. +// +// This pass implements the code transformation described in this paper: +// "Accurate Garbage Collection in an Uncooperative Environment" +// Fergus Henderson, ISMM, 2002 +// +// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with +// ShadowStackGC. +// +// In order to support this particular transformation, all stack roots are +// coallocated in the stack. This allows a fully target-independent stack map +// while introducing only minor runtime overhead. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shadowstackgc" +#include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CallSite.h" + +using namespace llvm; + +namespace { + + class ShadowStackGC : public GCStrategy { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + StructType *StackEntryTy; + StructType *FrameMapTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector<std::pair<CallInst*,AllocaInst*> > Roots; + + public: + ShadowStackGC(); + + bool initializeCustomLowering(Module &M); + bool performCustomLowering(Function &F); + + private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + Type* GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, + int Idx1, const char *Name); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, + int Idx1, int Idx2, const char *Name); + }; + +} + +static GCRegistry::Add<ShadowStackGC> +X("shadow-stack", "Very portable GC for uncooperative code generators"); + +namespace { + /// EscapeEnumerator - This is a little algorithm to find all escape points + /// from a function so that "finally"-style code can be inserted. In addition + /// to finding the existing return and unwind instructions, it also (if + /// necessary) transforms any call instructions into invokes and sends them to + /// a landing pad. + /// + /// It's wrapped up in a state machine using the same transform C# uses for + /// 'yield return' enumerators, This transform allows it to be non-allocating. + class EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + IRBuilder<> Builder; + + public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} + + IRBuilder<> *Next() { + switch (State) { + default: + return 0; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return', 'resume', and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = StateBB++; + + // Branches and invokes do not escape, only unwind, resume, and return + // do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) + continue; + + Builder.SetInsertPoint(TI->getParent(), TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector<Instruction*,16> Calls; + for (Function::iterator BB = F.begin(), + E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), + EE = BB->end(); II != EE; ++II) + if (CallInst *CI = dyn_cast<CallInst>(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return 0; + + // Create a cleanup block. + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), + Type::getInt32Ty(C), NULL); + Constant *PersFn = + F.getParent()-> + getOrInsertFunction("__gcc_personality_v0", + FunctionType::get(Type::getInt32Ty(C), true)); + LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1, + "cleanup.lpad", + CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value*,16> Args; + for (unsigned I = Calls.size(); I != 0; ) { + CallInst *CI = cast<CallInst>(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = + CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); + + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), + NewBB, CleanupBB, + Args, CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(RI->getParent(), RI); + return &Builder; + } + } + }; +} + +// ----------------------------------------------------------------------------- + +void llvm::linkShadowStackGC() { } + +ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { + InitRoots = true; + CustomRoots = true; +} + +Constant *ShadowStackGC::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector<Constant*, 16> Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + Metadata.resize(NumMeta); + + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + + Constant *BaseElts[] = { + ConstantInt::get(Int32Ty, Roots.size(), false), + ConstantInt::get(Int32Ty, NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(FrameMapTy, BaseElts), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata) + }; + + Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()}; + StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta)); + + Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, + GlobalVariable::InternalLinkage, + FrameMap, "__gc_" + F.getName()); + + Constant *GEPIndices[2] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) + }; + return ConstantExpr::getGetElementPtr(GV, GEPIndices); +} + +Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector<Type*> EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + + return StructType::create(EltTys, "gc_stackentry."+F.getName().str()); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackGC::initializeCustomLowering(Module &M) { + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector<Type*> EltTys; + // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::getInt32Ty(M.getContext())); + // Specifies length of variable length array. + EltTys.push_back(Type::getInt32Ty(M.getContext())); + FrameMapTy = StructType::create(EltTys, "gc_map"); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + + StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(StackEntryTy)); + EltTys.push_back(FrameMapPtrTy); + StackEntryTy->setBody(EltTys); + PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable(M, StackEntryPtrTy, false, + GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(StackEntryPtrTy), + "llvm_gc_root_chain"); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); + } + + return true; +} + +bool ShadowStackGC::IsNullValue(Value *V) { + if (Constant *C = dyn_cast<Constant>(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackGC::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair<CallInst*, AllocaInst*> Pair = std::make_pair( + CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, + int Idx, int Idx2, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx), + ConstantInt::get(Type::getInt32Ty(Context), Idx2) }; + Value* Val = B.CreateGEP(BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +GetElementPtrInst * +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx) }; + Value *Val = B.CreateGEP(BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackGC::performCustomLowering(Function &F) { + LLVMContext &Context = F.getContext(); + + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + IRBuilder<> AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, + "gc_frame"); + + while (isa<AllocaInst>(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, + 0,1,"gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by GCStrategy::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state at + // runtime), but it's nicer not to push the half-initialized entry onto the + // shadow stack. + while (isa<StoreInst>(IP)) ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, + StackEntry,0,0,"gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, + StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (IRBuilder<> *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, + "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + Roots.clear(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp new file mode 100644 index 0000000..9ab4918 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp @@ -0,0 +1,1152 @@ +//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a shrink wrapping variant of prolog/epilog insertion: +// - Spills and restores of callee-saved registers (CSRs) are placed in the +// machine CFG to tightly surround their uses so that execution paths that +// do not use CSRs do not pay the spill/restore penalty. +// +// - Avoiding placment of spills/restores in loops: if a CSR is used inside a +// loop the spills are placed in the loop preheader, and restores are +// placed in the loop exit nodes (the successors of loop _exiting_ nodes). +// +// - Covering paths without CSR uses: +// If a region in a CFG uses CSRs and has multiple entry and/or exit points, +// the use info for the CSRs inside the region is propagated outward in the +// CFG to ensure validity of the spill/restore placements. This decreases +// the effectiveness of shrink wrapping but does not require edge splitting +// in the machine CFG. +// +// This shrink wrapping implementation uses an iterative analysis to determine +// which basic blocks require spills and restores for CSRs. +// +// This pass uses MachineDominators and MachineLoopInfo. Loop information +// is used to prevent placement of callee-saved register spills/restores +// in the bodies of loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "shrink-wrap" + +#include "PrologEpilogInserter.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <sstream> + +using namespace llvm; + +STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); + +// Shrink Wrapping: +static cl::opt<bool> +ShrinkWrapping("shrink-wrap", + cl::desc("Shrink wrap callee-saved register spills/restores")); + +// Shrink wrap only the specified function, a debugging aid. +static cl::opt<std::string> +ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, + cl::desc("Shrink wrap the specified function"), + cl::value_desc("funcname"), + cl::init("")); + +// Debugging level for shrink wrapping. +enum ShrinkWrapDebugLevel { + None, BasicInfo, Iterations, Details +}; + +static cl::opt<enum ShrinkWrapDebugLevel> +ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, + cl::desc("Print shrink wrapping debugging information"), + cl::values( + clEnumVal(None , "disable debug output"), + clEnumVal(BasicInfo , "print basic DF sets"), + clEnumVal(Iterations, "print SR sets for each iteration"), + clEnumVal(Details , "print all DF sets"), + clEnumValEnd)); + + +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + if (ShrinkWrapping || ShrinkWrapFunc != "") { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + } + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +//===----------------------------------------------------------------------===// +// ShrinkWrapping implementation +//===----------------------------------------------------------------------===// + +// Convienences for dealing with machine loops. +MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { + assert(LP && "Machine loop is NULL."); + MachineBasicBlock* PHDR = LP->getLoopPreheader(); + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + PHDR = PLP->getLoopPreheader(); + PLP = PLP->getParentLoop(); + } + return PHDR; +} + +MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { + if (LP == 0) + return 0; + MachineLoop* PLP = LP->getParentLoop(); + while (PLP) { + LP = PLP; + PLP = PLP->getParentLoop(); + } + return LP; +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().isReturn()); +} + +// Initialize shrink wrapping DFA sets, called before iterations. +void PEI::clearAnticAvailSets() { + AnticIn.clear(); + AnticOut.clear(); + AvailIn.clear(); + AvailOut.clear(); +} + +// Clear all sets constructed by shrink wrapping. +void PEI::clearAllSets() { + ReturnBlocks.clear(); + clearAnticAvailSets(); + UsedCSRegs.clear(); + CSRUsed.clear(); + TLLoops.clear(); + CSRSave.clear(); + CSRRestore.clear(); +} + +// Initialize all shrink wrapping data. +void PEI::initShrinkWrappingInfo() { + clearAllSets(); + EntryBlock = 0; +#ifndef NDEBUG + HasFastExitPath = false; +#endif + ShrinkWrapThisFunction = ShrinkWrapping; + // DEBUG: enable or disable shrink wrapping for the current function + // via --shrink-wrap-func=<funcname>. +#ifndef NDEBUG + if (ShrinkWrapFunc != "") { + std::string MFName = MF->getName().str(); + ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); + } +#endif +} + + +/// placeCSRSpillsAndRestores - determine which MBBs of the function +/// need save, restore code for callee-saved registers by doing a DF analysis +/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs +/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo +/// is used to ensure that CSR save/restore code is not placed inside loops. +/// This function computes the maps of MBBs -> CSRs to spill and restore +/// in CSRSave, CSRRestore. +/// +/// If shrink wrapping is not being performed, place all spills in +/// the entry block, all restores in return blocks. In this case, +/// CSRSave has a single mapping, CSRRestore has mappings for each +/// return block. +/// +void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { + + DEBUG(MF = &Fn); + + initShrinkWrappingInfo(); + + DEBUG(if (ShrinkWrapThisFunction) { + dbgs() << "Place CSR spills/restores for " + << MF->getName() << "\n"; + }); + + if (calculateSets(Fn)) + placeSpillsAndRestores(Fn); +} + +/// calcAnticInOut - calculate the anticipated in/out reg sets +/// for the given MBB by looking forward in the MCFG at MBB's +/// successors. +/// +bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) + SmallVector<MachineBasicBlock*, 4> successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + + unsigned i = 0, e = successors.size(); + if (i != e) { + CSRegSet prevAnticOut = AnticOut[MBB]; + MachineBasicBlock* SUCC = successors[i]; + + AnticOut[MBB] = AnticIn[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + AnticOut[MBB] &= AnticIn[SUCC]; + } + if (prevAnticOut != AnticOut[MBB]) + changed = true; + } + + // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); + CSRegSet prevAnticIn = AnticIn[MBB]; + AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; + if (prevAnticIn != AnticIn[MBB]) + changed = true; + return changed; +} + +/// calcAvailInOut - calculate the available in/out reg sets +/// for the given MBB by looking backward in the MCFG at MBB's +/// predecessors. +/// +bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { + bool changed = false; + + // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) + SmallVector<MachineBasicBlock*, 4> predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + + unsigned i = 0, e = predecessors.size(); + if (i != e) { + CSRegSet prevAvailIn = AvailIn[MBB]; + MachineBasicBlock* PRED = predecessors[i]; + + AvailIn[MBB] = AvailOut[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + AvailIn[MBB] &= AvailOut[PRED]; + } + if (prevAvailIn != AvailIn[MBB]) + changed = true; + } + + // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); + CSRegSet prevAvailOut = AvailOut[MBB]; + AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; + if (prevAvailOut != AvailOut[MBB]) + changed = true; + return changed; +} + +/// calculateAnticAvail - build the sets anticipated and available +/// registers in the MCFG of the current function iteratively, +/// doing a combined forward and backward analysis. +/// +void PEI::calculateAnticAvail(MachineFunction &Fn) { + // Initialize data flow sets. + clearAnticAvailSets(); + + // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + bool changed = true; + unsigned iterations = 0; + while (changed) { + changed = false; + ++iterations; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Calculate anticipated in, out regs at MBB from + // anticipated at successors of MBB. + changed |= calcAnticInOut(MBB); + + // Calculate available in, out regs at MBB from + // available at predecessors of MBB. + changed |= calcAvailInOut(MBB); + } + } + + DEBUG({ + if (ShrinkWrapDebugging >= Details) { + dbgs() + << "-----------------------------------------------------------\n" + << " Antic/Avail Sets:\n" + << "-----------------------------------------------------------\n" + << "iterations = " << iterations << "\n" + << "-----------------------------------------------------------\n" + << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" + << "-----------------------------------------------------------\n"; + + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets(MBB); + } + + dbgs() + << "-----------------------------------------------------------\n"; + } + }); +} + +/// propagateUsesAroundLoop - copy used register info from MBB to all blocks +/// of the loop given by LP and its parent loops. This prevents spills/restores +/// from being placed in the bodies of loops. +/// +void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { + if (! MBB || !LP) + return; + + std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); + for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { + MachineBasicBlock* LBB = loopBlocks[i]; + if (LBB == MBB) + continue; + if (CSRUsed[LBB].contains(CSRUsed[MBB])) + continue; + CSRUsed[LBB] |= CSRUsed[MBB]; + } +} + +/// calculateSets - collect the CSRs used in this function, compute +/// the DF sets that describe the initial minimal regions in the +/// Machine CFG around which CSR spills and restores must be placed. +/// +/// Additionally, this function decides if shrink wrapping should +/// be disabled for the current function, checking the following: +/// 1. the current function has more than 500 MBBs: heuristic limit +/// on function size to reduce compile time impact of the current +/// iterative algorithm. +/// 2. all CSRs are used in the entry block. +/// 3. all CSRs are used in all immediate successors of the entry block. +/// 4. all CSRs are used in a subset of blocks, each of which dominates +/// all return blocks. These blocks, taken as a subgraph of the MCFG, +/// are equivalent to the entry block since all execution paths pass +/// through them. +/// +bool PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector<CalleeSavedInfo> CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) { + DEBUG(if (ShrinkWrapThisFunction) + dbgs() << "DISABLED: " << Fn.getName() + << ": uses no callee-saved registers\n"); + return false; + } + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + // Determine if this function has fast exit paths. + DEBUG(if (ShrinkWrapThisFunction) + findFastExitPath()); + + // Limit shrink wrapping via the current iterative bit vector + // implementation to functions with <= 500 MBBs. + if (Fn.size() > 500) { + DEBUG(if (ShrinkWrapThisFunction) + dbgs() << "DISABLED: " << Fn.getName() + << ": too large (" << Fn.size() << " MBBs)\n"); + ShrinkWrapThisFunction = false; + } + + // Return now if not shrink wrapping. + if (! ShrinkWrapThisFunction) + return false; + + // Collect set of used CSRs. + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + UsedCSRegs.set(inx); + } + + // Walk instructions in all MBBs, create CSRUsed[] sets, choose + // whether or not to shrink wrap this function. + MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + + bool allCSRUsesInEntryBlock = true; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { + for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { + unsigned Reg = CSI[inx].getReg(); + // If instruction I reads or modifies Reg, add it to UsedCSRegs, + // CSRUsed map for the current block. + for (unsigned opInx = 0, opEnd = I->getNumOperands(); + opInx != opEnd; ++opInx) { + const MachineOperand &MO = I->getOperand(opInx); + if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(Reg, MOReg))) { + // CSR Reg is defined/used in block MBB. + CSRUsed[MBB].set(inx); + // Check for uses in EntryBlock. + if (MBB != EntryBlock) + allCSRUsesInEntryBlock = false; + } + } + } + } + + if (CSRUsed[MBB].empty()) + continue; + + // Propagate CSRUsed[MBB] in loops + if (MachineLoop* LP = LI.getLoopFor(MBB)) { + // Add top level loop to work list. + MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); + MachineLoop* PLP = getTopLevelLoopParent(LP); + + if (! HDR) { + HDR = PLP->getHeader(); + assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); + MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); + HDR = *PI; + } + TLLoops[HDR] = PLP; + + // Push uses from inside loop to its parent loops, + // or to all other MBBs in its loop. + if (LP->getLoopDepth() > 1) { + for (MachineLoop* PLP = LP->getParentLoop(); PLP; + PLP = PLP->getParentLoop()) { + propagateUsesAroundLoop(MBB, PLP); + } + } else { + propagateUsesAroundLoop(MBB, LP); + } + } + } + + if (allCSRUsesInEntryBlock) { + DEBUG(dbgs() << "DISABLED: " << Fn.getName() + << ": all CSRs used in EntryBlock\n"); + ShrinkWrapThisFunction = false; + } else { + bool allCSRsUsedInEntryFanout = true; + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (CSRUsed[SUCC] != UsedCSRegs) + allCSRsUsedInEntryFanout = false; + } + if (allCSRsUsedInEntryFanout) { + DEBUG(dbgs() << "DISABLED: " << Fn.getName() + << ": all CSRs used in imm successors of EntryBlock\n"); + ShrinkWrapThisFunction = false; + } + } + + if (ShrinkWrapThisFunction) { + // Check if MBB uses CSRs and dominates all exit nodes. + // Such nodes are equiv. to the entry node w.r.t. + // CSR uses: every path through the function must + // pass through this node. If each CSR is used at least + // once by these nodes, shrink wrapping is disabled. + CSRegSet CSRUsedInChokePoints; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) + continue; + bool dominatesExitNodes = true; + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + if (! DT.dominates(MBB, ReturnBlocks[ri])) { + dominatesExitNodes = false; + break; + } + if (dominatesExitNodes) { + CSRUsedInChokePoints |= CSRUsed[MBB]; + if (CSRUsedInChokePoints == UsedCSRegs) { + DEBUG(dbgs() << "DISABLED: " << Fn.getName() + << ": all CSRs used in choke point(s) at " + << getBasicBlockName(MBB) << "\n"); + ShrinkWrapThisFunction = false; + break; + } + } + } + } + + // Return now if we have decided not to apply shrink wrapping + // to the current function. + if (! ShrinkWrapThisFunction) + return false; + + DEBUG({ + dbgs() << "ENABLED: " << Fn.getName(); + if (HasFastExitPath) + dbgs() << " (fast exit path)"; + dbgs() << "\n"; + if (ShrinkWrapDebugging >= BasicInfo) { + dbgs() << "------------------------------" + << "-----------------------------\n"; + dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + if (ShrinkWrapDebugging >= Details) { + dbgs() << "------------------------------" + << "-----------------------------\n"; + dumpAllUsed(); + } + } + }); + + // Build initial DF sets to determine minimal regions in the + // Machine CFG around which CSRs must be spilled and restored. + calculateAnticAvail(Fn); + + return true; +} + +/// addUsesForMEMERegion - add uses of CSRs spilled or restored in +/// multi-entry, multi-exit (MEME) regions so spill and restore +/// placement will not break code that enters or leaves a +/// shrink-wrapped region by inducing spills with no matching +/// restores or restores with no matching spills. A MEME region +/// is a subgraph of the MCFG with multiple entry edges, multiple +/// exit edges, or both. This code propagates use information +/// through the MCFG until all paths requiring spills and restores +/// _outside_ the computed minimal placement regions have been covered. +/// +bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4>& blks) { + if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { + bool processThisBlock = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC->pred_size() > 1) { + processThisBlock = true; + break; + } + } + if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED->succ_size() > 1) { + processThisBlock = true; + break; + } + } + } + if (! processThisBlock) + return false; + } + + CSRegSet prop; + if (!CSRSave[MBB].empty()) + prop = CSRSave[MBB]; + else if (!CSRRestore[MBB].empty()) + prop = CSRRestore[MBB]; + else + prop = CSRUsed[MBB]; + if (prop.empty()) + return false; + + // Propagate selected bits to successors, predecessors of MBB. + bool addedUses = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + // Self-loop + if (SUCC == MBB) + continue; + if (! CSRUsed[SUCC].contains(prop)) { + CSRUsed[SUCC] |= prop; + addedUses = true; + blks.push_back(SUCC); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "successor " << getBasicBlockName(SUCC) << "\n"); + } + } + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + // Self-loop + if (PRED == MBB) + continue; + if (! CSRUsed[PRED].contains(prop)) { + CSRUsed[PRED] |= prop; + addedUses = true; + blks.push_back(PRED); + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(prop) << ")->" + << "predecessor " << getBasicBlockName(PRED) << "\n"); + } + } + return addedUses; +} + +/// addUsesForTopLevelLoops - add uses for CSRs used inside top +/// level loops to the exit blocks of those loops. +/// +bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { + bool addedUses = false; + + // Place restores for top level loops where needed. + for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator + I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { + MachineBasicBlock* MBB = I->first; + MachineLoop* LP = I->second; + MachineBasicBlock* HDR = LP->getHeader(); + SmallVector<MachineBasicBlock*, 4> exitBlocks; + CSRegSet loopSpills; + + loopSpills = CSRSave[MBB]; + if (CSRSave[MBB].empty()) { + loopSpills = CSRUsed[HDR]; + assert(!loopSpills.empty() && "No CSRs used in loop?"); + } else if (CSRRestore[MBB].contains(CSRSave[MBB])) + continue; + + LP->getExitBlocks(exitBlocks); + assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); + for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { + MachineBasicBlock* EXB = exitBlocks[i]; + if (! CSRUsed[EXB].contains(loopSpills)) { + CSRUsed[EXB] |= loopSpills; + addedUses = true; + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << "LOOP " << getBasicBlockName(MBB) + << "(" << stringifyCSRegSet(loopSpills) << ")->" + << getBasicBlockName(EXB) << "\n"); + if (EXB->succ_size() > 1 || EXB->pred_size() > 1) + blks.push_back(EXB); + } + } + } + return addedUses; +} + +/// calcSpillPlacements - determine which CSRs should be spilled +/// in MBB using AnticIn sets of MBB's predecessors, keeping track +/// of changes to spilled reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevSpills) { + bool placedSpills = false; + // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) + CSRegSet anticInPreds; + SmallVector<MachineBasicBlock*, 4> predecessors; + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock* PRED = *PI; + if (PRED != MBB) + predecessors.push_back(PRED); + } + unsigned i = 0, e = predecessors.size(); + if (i != e) { + MachineBasicBlock* PRED = predecessors[i]; + anticInPreds = UsedCSRegs - AnticIn[PRED]; + for (++i; i != e; ++i) { + PRED = predecessors[i]; + anticInPreds &= (UsedCSRegs - AnticIn[PRED]); + } + } else { + // Handle uses in entry blocks (which have no predecessors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + anticInPreds = UsedCSRegs; + } + // Compute spills required at MBB: + CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; + + if (! CSRSave[MBB].empty()) { + if (MBB == EntryBlock) { + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) + CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; + } else { + // Reset all regs spilled in MBB that are also spilled in EntryBlock. + if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { + CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; + } + } + } + placedSpills = (CSRSave[MBB] != prevSpills[MBB]); + prevSpills[MBB] = CSRSave[MBB]; + // Remember this block for adding restores to successor + // blocks for multi-entry region. + if (placedSpills) + blks.push_back(MBB); + + DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) << "\n"); + + return placedSpills; +} + +/// calcRestorePlacements - determine which CSRs should be restored +/// in MBB using AvailOut sets of MBB's succcessors, keeping track +/// of changes to restored reg sets. Add MBB to the set of blocks +/// that need to be processed for propagating use info to cover +/// multi-entry/exit regions. +/// +bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, + SmallVector<MachineBasicBlock*, 4> &blks, + CSRegBlockMap &prevRestores) { + bool placedRestores = false; + // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) + CSRegSet availOutSucc; + SmallVector<MachineBasicBlock*, 4> successors; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + if (SUCC != MBB) + successors.push_back(SUCC); + } + unsigned i = 0, e = successors.size(); + if (i != e) { + MachineBasicBlock* SUCC = successors[i]; + availOutSucc = UsedCSRegs - AvailOut[SUCC]; + for (++i; i != e; ++i) { + SUCC = successors[i]; + availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); + } + } else { + if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { + // Handle uses in return blocks (which have no successors). + // This is necessary because the DFA formulation assumes the + // entry and (multiple) exit nodes cannot have CSR uses, which + // is not the case in the real world. + availOutSucc = UsedCSRegs; + } + } + // Compute restores required at MBB: + CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; + + // Postprocess restore placements at MBB. + // Remove the CSRs that are restored in the return blocks. + // Lest this be confusing, note that: + // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. + if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { + if (! CSRSave[EntryBlock].empty()) + CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; + } + placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); + prevRestores[MBB] = CSRRestore[MBB]; + // Remember this block for adding saves to predecessor + // blocks for multi-entry region. + if (placedRestores) + blks.push_back(MBB); + + DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) + dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); + + return placedRestores; +} + +/// placeSpillsAndRestores - place spills and restores of CSRs +/// used in MBBs in minimal regions that contain the uses. +/// +void PEI::placeSpillsAndRestores(MachineFunction &Fn) { + CSRegBlockMap prevCSRSave; + CSRegBlockMap prevCSRRestore; + SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; + bool changed = true; + unsigned iterations = 0; + + // Iterate computation of spill and restore placements in the MCFG until: + // 1. CSR use info has been fully propagated around the MCFG, and + // 2. computation of CSRSave[], CSRRestore[] reach fixed points. + while (changed) { + changed = false; + ++iterations; + + DEBUG(if (ShrinkWrapDebugging >= Iterations) + dbgs() << "iter " << iterations + << " --------------------------------------------------\n"); + + // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, + // which determines the placements of spills and restores. + // Keep track of changes to spills, restores in each iteration to + // minimize the total iterations. + bool SRChanged = false; + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + + // Place spills for CSRs in MBB. + SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); + + // Place restores for CSRs in MBB. + SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); + } + + // Add uses of CSRs used inside loops where needed. + changed |= addUsesForTopLevelLoops(cvBlocks); + + // Add uses for CSRs spilled or restored at branch, join points. + if (changed || SRChanged) { + while (! cvBlocks.empty()) { + MachineBasicBlock* MBB = cvBlocks.pop_back_val(); + changed |= addUsesForMEMERegion(MBB, ncvBlocks); + } + if (! ncvBlocks.empty()) { + cvBlocks = ncvBlocks; + ncvBlocks.clear(); + } + } + + if (changed) { + calculateAnticAvail(Fn); + CSRSave.clear(); + CSRRestore.clear(); + } + } + + // Check for effectiveness: + // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} + // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] + // Gives a measure of how many CSR spills have been moved from EntryBlock + // to minimal regions enclosing their uses. + CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); + unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); + numSRReduced += numSRReducedThisFunc; + DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { + dbgs() << "-----------------------------------------------------------\n"; + dbgs() << "total iterations = " << iterations << " ( " + << Fn.getName() + << " " << numSRReducedThisFunc + << " " << Fn.size() + << " )\n"; + dbgs() << "-----------------------------------------------------------\n"; + dumpSRSets(); + dbgs() << "-----------------------------------------------------------\n"; + if (numSRReducedThisFunc) + verifySpillRestorePlacement(); + }); +} + +// Debugging methods. +#ifndef NDEBUG +/// findFastExitPath - debugging method used to detect functions +/// with at least one path from the entry block to a return block +/// directly or which has a very small number of edges. +/// +void PEI::findFastExitPath() { + if (! EntryBlock) + return; + // Fina a path from EntryBlock to any return block that does not branch: + // Entry + // | ... + // v | + // B1<-----+ + // | + // v + // Return + for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), + SE = EntryBlock->succ_end(); SI != SE; ++SI) { + MachineBasicBlock* SUCC = *SI; + + // Assume positive, disprove existence of fast path. + HasFastExitPath = true; + + // Check the immediate successors. + if (isReturnBlock(SUCC)) { + if (ShrinkWrapDebugging >= BasicInfo) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << getBasicBlockName(SUCC) << "\n"; + break; + } + // Traverse df from SUCC, look for a branch block. + std::string exitPath = getBasicBlockName(SUCC); + for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), + BE = df_end(SUCC); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + // Reject paths with branch nodes. + if (SBB->succ_size() > 1) { + HasFastExitPath = false; + break; + } + exitPath += "->" + getBasicBlockName(SBB); + } + if (HasFastExitPath) { + if (ShrinkWrapDebugging >= BasicInfo) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + << "->" << exitPath << "\n"; + break; + } + } +} + +/// verifySpillRestorePlacement - check the current spill/restore +/// sets for safety. Attempt to find spills without restores or +/// restores without spills. +/// Spills: walk df from each MBB in spill set ensuring that +/// all CSRs spilled at MMBB are restored on all paths +/// from MBB to all exit blocks. +/// Restores: walk idf from each MBB in restore set ensuring that +/// all CSRs restored at MBB are spilled on all paths +/// reaching MBB. +/// +void PEI::verifySpillRestorePlacement() { + unsigned numReturnBlocks = 0; + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + if (isReturnBlock(MBB) || MBB->succ_size() == 0) + ++numReturnBlocks; + } + for (CSRegBlockMap::iterator BI = CSRSave.begin(), + BE = CSRSave.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet spilled = BI->second; + CSRegSet restored; + + if (spilled.empty()) + continue; + + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(spilled) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); + + if (CSRRestore[MBB].intersects(spilled)) { + restored |= (CSRRestore[MBB] & spilled); + } + + // Walk depth first from MBB to find restores of all CSRs spilled at MBB: + // we must find restores for all spills w/no intervening spills on all + // paths from MBB to all return blocks. + for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), + BE = df_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* SBB = *BI; + if (SBB == MBB) + continue; + // Stop when we encounter spills of any CSRs spilled at MBB that + // have not yet been seen to be restored. + if (CSRSave[SBB].intersects(spilled) && + !restored.contains(CSRSave[SBB] & spilled)) + break; + // Collect the CSRs spilled at MBB that are restored + // at this DF successor of MBB. + if (CSRRestore[SBB].intersects(spilled)) + restored |= (CSRRestore[SBB] & spilled); + // If we are at a retun block, check that the restores + // we have seen so far exhaust the spills at MBB, then + // reset the restores. + if (isReturnBlock(SBB) || SBB->succ_size() == 0) { + if (restored != spilled) { + CSRegSet notRestored = (spilled - restored); + DEBUG(dbgs() << MF->getName() << ": " + << stringifyCSRegSet(notRestored) + << " spilled at " << getBasicBlockName(MBB) + << " are never restored on path to return " + << getBasicBlockName(SBB) << "\n"); + } + restored.clear(); + } + } + } + + // Check restore placements. + for (CSRegBlockMap::iterator BI = CSRRestore.begin(), + BE = CSRRestore.end(); BI != BE; ++BI) { + MachineBasicBlock* MBB = BI->first; + CSRegSet restored = BI->second; + CSRegSet spilled; + + if (restored.empty()) + continue; + + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(restored) << "\n"); + + if (CSRSave[MBB].intersects(restored)) { + spilled |= (CSRSave[MBB] & restored); + } + // Walk inverse depth first from MBB to find spills of all + // CSRs restored at MBB: + for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), + BE = idf_end(MBB); BI != BE; ++BI) { + MachineBasicBlock* PBB = *BI; + if (PBB == MBB) + continue; + // Stop when we encounter restores of any CSRs restored at MBB that + // have not yet been seen to be spilled. + if (CSRRestore[PBB].intersects(restored) && + !spilled.contains(CSRRestore[PBB] & restored)) + break; + // Collect the CSRs restored at MBB that are spilled + // at this DF predecessor of MBB. + if (CSRSave[PBB].intersects(restored)) + spilled |= (CSRSave[PBB] & restored); + } + if (spilled != restored) { + CSRegSet notSpilled = (restored - spilled); + DEBUG(dbgs() << MF->getName() << ": " + << stringifyCSRegSet(notSpilled) + << " restored at " << getBasicBlockName(MBB) + << " are never spilled\n"); + } + } +} + +// Debugging print methods. +std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { + if (!MBB) + return ""; + + if (MBB->getBasicBlock()) + return MBB->getBasicBlock()->getName().str(); + + std::ostringstream name; + name << "_MBB_" << MBB->getNumber(); + return name.str(); +} + +std::string PEI::stringifyCSRegSet(const CSRegSet& s) { + const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); + const std::vector<CalleeSavedInfo> CSI = + MF->getFrameInfo()->getCalleeSavedInfo(); + + std::ostringstream srep; + if (CSI.size() == 0) { + srep << "[]"; + return srep.str(); + } + srep << "["; + CSRegSet::iterator I = s.begin(), E = s.end(); + if (I != E) { + unsigned reg = CSI[*I].getReg(); + srep << TRI->getName(reg); + for (++I; I != E; ++I) { + reg = CSI[*I].getReg(); + srep << ","; + srep << TRI->getName(reg); + } + } + srep << "]"; + return srep.str(); +} + +void PEI::dumpSet(const CSRegSet& s) { + DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); +} + +void PEI::dumpUsed(MachineBasicBlock* MBB) { + DEBUG({ + if (MBB) + dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; + }); +} + +void PEI::dumpAllUsed() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpUsed(MBB); + } +} + +void PEI::dumpSets(MachineBasicBlock* MBB) { + DEBUG({ + if (MBB) + dbgs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << "\n"; + }); +} + +void PEI::dumpSets1(MachineBasicBlock* MBB) { + DEBUG({ + if (MBB) + dbgs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << " | " + << stringifyCSRegSet(CSRSave[MBB]) << " | " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + }); +} + +void PEI::dumpAllSets() { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets1(MBB); + } +} + +void PEI::dumpSRSets() { + DEBUG({ + for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); + MBB != E; ++MBB) { + if (!CSRSave[MBB].empty()) { + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]); + if (CSRRestore[MBB].empty()) + dbgs() << '\n'; + } + + if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) + dbgs() << " " + << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } + }); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp new file mode 100644 index 0000000..3903743 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -0,0 +1,502 @@ +//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use SjLj +// based exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sjljehprepare" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include <set> +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +namespace { + class SjLjEHPrepare : public FunctionPass { + const TargetLoweringBase *TLI; + Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *CallSiteFn; + Constant *FuncCtxFn; + AllocaInst *FuncCtx; + public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) + : FunctionPass(ID), TLI(tli) { } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const {} + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } + + private: + bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal); + Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads); + void lowerIncomingArguments(Function &F); + void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes); + void insertCallSiteStore(Instruction *I, int Number); + }; +} // end anonymous namespace + +char SjLjEHPrepare::ID = 0; + +// Public Interface To the SjLjEHPrepare pass. +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { + return new SjLjEHPrepare(TLI); +} +// doInitialization - Set up decalarations and types needed to process +// exceptions. +bool SjLjEHPrepare::doInitialization(Module &M) { + // Build the function context structure. + // builtin_setjmp uses a five word jbuf + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + FunctionContextTy = + StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + UnregisterFn = + M.getOrInsertFunction("_Unwind_SjLj_Unregister", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); + BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); + LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); + CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); + FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); + PersonalityFn = 0; + + return true; +} + +/// insertCallSiteStore - Insert a store of the call-site value to the +/// function context +void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { + IRBuilder<> Builder(I); + + // Get a reference to the call_site field. + Type *Int32Ty = Type::getInt32Ty(I->getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + Value *One = ConstantInt::get(Int32Ty, 1); + Value *Idxs[2] = { Zero, One }; + Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); + + // Insert a store of the call-site number + ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), + Number); + Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/); +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, + SmallPtrSet<BasicBlock*, 64> &LiveBBs) { + if (!LiveBBs.insert(BB)) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +/// substituteLPadValues - Substitute the values returned by the landingpad +/// instruction with those returned by the personality function. +void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal) { + SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + while (!UseWorkList.empty()) { + Value *Val = UseWorkList.pop_back_val(); + ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); + if (!EVI) continue; + if (EVI->getNumIndices() != 1) continue; + if (*EVI->idx_begin() == 0) + EVI->replaceAllUsesWith(ExnVal); + else if (*EVI->idx_begin() == 1) + EVI->replaceAllUsesWith(SelVal); + if (EVI->getNumUses() == 0) + EVI->eraseFromParent(); + } + + if (LPI->getNumUses() == 0) return; + + // There are still some uses of LPI. Construct an aggregate with the exception + // values and replace the LPI with that aggregate. + Type *LPadType = LPI->getType(); + Value *LPadVal = UndefValue::get(LPadType); + IRBuilder<> + Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); + LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); + + LPI->replaceAllUsesWith(LPadVal); +} + +/// setupFunctionContext - Allocate the function context on the stack and fill +/// it with all of the data that we know at this point. +Value *SjLjEHPrepare:: +setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { + BasicBlock *EntryBB = F.begin(); + + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an alloca + // because the value needs to be added to the global context list. + unsigned Align = + TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); + FuncCtx = + new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); + + // Fill in the function context structure. + for (unsigned I = 0, E = LPads.size(); I != E; ++I) { + LandingPadInst *LPI = LPads[I]; + IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); + + // Reference the __data field. + Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); + + // The exception values come back in context->__data[0]. + Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, + "exception_gep"); + Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); + ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); + + Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, + "exn_selector_gep"); + Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); + + substituteLPadValues(LPI, ExnVal, SelVal); + } + + // Personality function + IRBuilder<> Builder(EntryBB->getTerminator()); + if (!PersonalityFn) + PersonalityFn = LPads[0]->getPersonalityFn(); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, + "pers_fn_gep"); + Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); + + // LSDA address + Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); + Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep"); + Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); + + return FuncCtx; +} + +/// lowerIncomingArguments - To avoid having to handle incoming arguments +/// specially, we lower each arg to a copy instruction in the entry block. This +/// ensures that the argument value itself cannot be live out of the entry +/// block. +void SjLjEHPrepare::lowerIncomingArguments(Function &F) { + BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) + ++AfterAllocaInsPt; + + for (Function::arg_iterator + AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + Type *Ty = AI->getType(); + + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = + new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", + AfterAllocaInsPt); + AI->replaceAllUsesWith(NC); + + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, we're + // replacing it here with the same value it was constructed with. We do + // this because the above replaceAllUsesWith() clobbered the operand, but + // we want this one to remain. + NC->setOperand(0, AI); + } + } +} + +/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind +/// edge and spill them. +void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, + ArrayRef<InvokeInst*> Invokes) { + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator + BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator + II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->use_back())->getParent() == BB && + !isa<PHINode>(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector<Instruction*, 16> Users; + for (Value::use_iterator + UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User->getParent() != BB || isa<PHINode>(User)) + Users.push_back(User); + } + + // Find all of the blocks that this value is live in. + SmallPtrSet<BasicBlock*, 64> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " + << UnwindBlock->getName() << "\n"); + NeedsSpill = true; + break; + } + } + + // If we decided we need a spill, do it. + // FIXME: Spilling this way is overkill, as it forces all uses of + // the value to be reloaded from the stack slot, even those that aren't + // in the unwind blocks. We should be more selective. + if (NeedsSpill) { + DemoteRegToStack(*Inst, true); + ++NumSpilled; + } + } + } + + // Go through the landing pads and remove any PHIs there. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); + + // Place PHIs into a set to avoid invalidating the iterator. + SmallPtrSet<PHINode*, 8> PHIsToDemote; + for (BasicBlock::iterator + PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) + PHIsToDemote.insert(cast<PHINode>(PN)); + if (PHIsToDemote.empty()) continue; + + // Demote the PHIs to the stack. + for (SmallPtrSet<PHINode*, 8>::iterator + I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + DemotePHIToStack(*I); + + // Move the landingpad instruction back to the top of the landing pad block. + LPI->moveBefore(UnwindBlock->begin()); + } +} + +/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling +/// the function context and marking the call sites with the appropriate +/// values. These values are used by the DWARF EH emitter. +bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { + SmallVector<ReturnInst*, 16> Returns; + SmallVector<InvokeInst*, 16> Invokes; + SmallSetVector<LandingPadInst*, 16> LPads; + + // Look through the terminators of the basic blocks to find invokes. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (Function *Callee = II->getCalledFunction()) + if (Callee->isIntrinsic() && + Callee->getIntrinsicID() == Intrinsic::donothing) { + // Remove the NOP invoke. + BranchInst::Create(II->getNormalDest(), II); + II->eraseFromParent(); + continue; + } + + Invokes.push_back(II); + LPads.insert(II->getUnwindDest()->getLandingPadInst()); + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + Returns.push_back(RI); + } + + if (Invokes.empty()) return false; + + NumInvokes += Invokes.size(); + + lowerIncomingArguments(F); + lowerAcrossUnwindEdges(F, Invokes); + + Value *FuncCtx = + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); + BasicBlock *EntryBB = F.begin(); + IRBuilder<> Builder(EntryBB->getTerminator()); + + // Get a reference to the jump buffer. + Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); + + // Save the frame pointer. + Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); + + Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); + Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); + + // Save the stack pointer. + Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); + + Val = Builder.CreateCall(StackAddrFn, "sp"); + Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); + + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. + Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); + Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); + + // Store a pointer to the function context so that the back-end will know + // where to look for it. + Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); + Builder.CreateCall(FuncCtxFn, FuncCtxArg); + + // At this point, we are all set up, update the invoke instructions to mark + // their call_site values. + for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { + insertCallSiteStore(Invokes[I], I + 1); + + ConstantInt *CallSiteNum = + ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); + + // Record the call site value for the back end so it stays associated with + // the invoke. + CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); + } + + // Mark call instructions that aren't nounwind as no-action (call_site == + // -1). Skip the entry block, as prior to then, no function context has been + // created for this function and any unexpected exceptions thrown will go + // directly to the caller's context, which is what we want anyway, so no need + // to do anything here. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) + for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (!CI->doesNotThrow()) + insertCallSiteStore(CI, -1); + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { + insertCallSiteStore(RI, -1); + } + + // Register the function context and make sure it's known to not throw + CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", + EntryBB->getTerminator()); + Register->setDoesNotThrow(); + + // Following any allocas not in the entry block, update the saved SP in the + // jmpbuf to the new value. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (BB == F.begin()) + continue; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->getCalledFunction() != StackRestoreFn) + continue; + } else if (!isa<AllocaInst>(I)) { + continue; + } + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(I); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + } + + // Finally, for any returns from this function, if this function contains an + // invoke, add a call to unregister the function context. + for (unsigned I = 0, E = Returns.size(); I != E; ++I) + CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]); + + return true; +} + +bool SjLjEHPrepare::runOnFunction(Function &F) { + bool Res = setupEntryBlockAndCallSites(F); + return Res; +} diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp new file mode 100644 index 0000000..20049a8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -0,0 +1,250 @@ +//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "slotindexes" + +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +char SlotIndexes::ID = 0; +INITIALIZE_PASS(SlotIndexes, "slotindexes", + "Slot index numbering", false, false) + +STATISTIC(NumLocalRenum, "Number of local renumberings"); +STATISTIC(NumGlobalRenum, "Number of global renumberings"); + +void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +void SlotIndexes::releaseMemory() { + mi2iMap.clear(); + MBBRanges.clear(); + idx2MBBMap.clear(); + indexList.clear(); + ileAllocator.Reset(); +} + +bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { + + // Compute numbering as follows: + // Grab an iterator to the start of the index list. + // Iterate over all MBBs, and within each MBB all MIs, keeping the MI + // iterator in lock-step (though skipping it over indexes which have + // null pointers in the instruction field). + // At each iteration assert that the instruction pointed to in the index + // is the same one pointed to by the MI iterator. This + + // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should + // only need to be set up once after the first numbering is computed. + + mf = &fn; + + // Check that the list contains only the sentinal. + assert(indexList.empty() && "Index list non-empty at initial numbering?"); + assert(idx2MBBMap.empty() && + "Index -> MBB mapping non-empty at initial numbering?"); + assert(MBBRanges.empty() && + "MBB -> Index mapping non-empty at initial numbering?"); + assert(mi2iMap.empty() && + "MachineInstr -> Index mapping non-empty at initial numbering?"); + + unsigned index = 0; + MBBRanges.resize(mf->getNumBlockIDs()); + idx2MBBMap.reserve(mf->size()); + + indexList.push_back(createEntry(0, index)); + + // Iterate over the function. + for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); + mbbItr != mbbEnd; ++mbbItr) { + MachineBasicBlock *mbb = &*mbbItr; + + // Insert an index for the MBB start. + SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); + + for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); + miItr != miEnd; ++miItr) { + MachineInstr *mi = miItr; + if (mi->isDebugValue()) + continue; + + // Insert a store index for the instr. + indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist)); + + // Save this base index in the maps. + mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), + SlotIndex::Slot_Block))); + } + + // We insert one blank instructions between basic blocks. + indexList.push_back(createEntry(0, index += SlotIndex::InstrDist)); + + MBBRanges[mbb->getNumber()].first = blockStartIndex; + MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), + SlotIndex::Slot_Block); + idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); + } + + // Sort the Idx2MBBMap + std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); + + DEBUG(mf->print(dbgs(), this)); + + // And we're done! + return false; +} + +void SlotIndexes::renumberIndexes() { + // Renumber updates the index of every element of the index list. + DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); + ++NumGlobalRenum; + + unsigned index = 0; + + for (IndexList::iterator I = indexList.begin(), E = indexList.end(); + I != E; ++I) { + I->setIndex(index); + index += SlotIndex::InstrDist; + } +} + +// Renumber indexes locally after curItr was inserted, but failed to get a new +// index. +void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { + // Number indexes with half the default spacing so we can catch up quickly. + const unsigned Space = SlotIndex::InstrDist/2; + assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); + + IndexList::iterator startItr = prior(curItr); + unsigned index = startItr->getIndex(); + do { + curItr->setIndex(index += Space); + ++curItr; + // If the next index is bigger, we have caught up. + } while (curItr != indexList.end() && curItr->getIndex() <= index); + + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-' + << index << " ***\n"); + ++NumLocalRenum; +} + +// Repair indexes after adding and removing instructions. +void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End) { + // FIXME: Is this really necessary? The only caller repairIntervalsForRange() + // does the same thing. + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !hasIndex(End)) + ++End; + + bool includeStart = (Begin == MBB->begin()); + SlotIndex startIdx; + if (includeStart) + startIdx = getMBBStartIdx(MBB); + else + startIdx = getInstructionIndex(Begin); + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB); + else + endIdx = getInstructionIndex(End); + + // FIXME: Conceptually, this code is implementing an iterator on MBB that + // optionally includes an additional position prior to MBB->begin(), indicated + // by the includeStart flag. This is done so that we can iterate MIs in a MBB + // in parallel with SlotIndexes, but there should be a better way to do this. + IndexList::iterator ListB = startIdx.listEntry(); + IndexList::iterator ListI = endIdx.listEntry(); + MachineBasicBlock::iterator MBBI = End; + bool pastStart = false; + while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) { + assert(ListI->getIndex() >= startIdx.getIndex() && + (includeStart || !pastStart) && + "Decremented past the beginning of region to repair."); + + MachineInstr *SlotMI = ListI->getInstr(); + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0; + bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); + + if (SlotMI == MI && !MBBIAtBegin) { + --ListI; + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) { + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else { + --ListI; + if (SlotMI) + removeMachineInstrFromMaps(SlotMI); + } + } + + // In theory this could be combined with the previous loop, but it is tricky + // to update the IndexList while we are iterating it. + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end()) + insertMachineInstrInMaps(MI); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void SlotIndexes::dump() const { + for (IndexList::const_iterator itr = indexList.begin(); + itr != indexList.end(); ++itr) { + dbgs() << itr->getIndex() << " "; + + if (itr->getInstr() != 0) { + dbgs() << *itr->getInstr(); + } else { + dbgs() << "\n"; + } + } + + for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) + dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' + << MBBRanges[i].second << ")\n"; +} +#endif + +// Print a SlotIndex to a raw_ostream. +void SlotIndex::print(raw_ostream &os) const { + if (isValid()) + os << listEntry()->getIndex() << "Berd"[getSlot()]; + else + os << "invalid"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +// Dump a SlotIndex to stderr. +void SlotIndex::dump() const { + print(dbgs()); + dbgs() << "\n"; +} +#endif + diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp new file mode 100644 index 0000000..c5bbba3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -0,0 +1,381 @@ +//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the spill code placement analysis. +// +// Each edge bundle corresponds to a node in a Hopfield network. Constraints on +// basic blocks are weighted by the block frequency and added to become the node +// bias. +// +// Transparent basic blocks have the variable live through, but don't care if it +// is spilled or in a register. These blocks become connections in the Hopfield +// network, again weighted by block frequency. +// +// The Hopfield network minimizes (possibly locally) its energy function: +// +// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b ) +// +// The energy function represents the expected spill code execution frequency, +// or the cost of spilling. This is a Lyapunov function which never increases +// when a node is updated. It is guaranteed to converge to a local minimum. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spillplacement" +#include "SpillPlacement.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" + +using namespace llvm; + +char SpillPlacement::ID = 0; +INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", + "Spill Code Placement Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(EdgeBundles) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement", + "Spill Code Placement Analysis", true, true) + +char &llvm::SpillPlacementID = SpillPlacement::ID; + +void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<EdgeBundles>(); + AU.addRequiredTransitive<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Node - Each edge bundle corresponds to a Hopfield node. +/// +/// The node contains precomputed frequency data that only depends on the CFG, +/// but Bias and Links are computed each time placeSpills is called. +/// +/// The node Value is positive when the variable should be in a register. The +/// value can change when linked nodes change, but convergence is very fast +/// because all weights are positive. +/// +struct SpillPlacement::Node { + /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. + /// Ideally, these two numbers should be identical, but inaccuracies in the + /// block frequency estimates means that we need to normalize ingoing and + /// outgoing frequencies separately so they are commensurate. + float Scale[2]; + + /// Bias - Normalized contributions from non-transparent blocks. + /// A bundle connected to a MustSpill block has a huge negative bias, + /// otherwise it is a number in the range [-2;2]. + float Bias; + + /// Value - Output value of this node computed from the Bias and links. + /// This is always in the range [-1;1]. A positive number means the variable + /// should go in a register through this bundle. + float Value; + + typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector; + + /// Links - (Weight, BundleNo) for all transparent blocks connecting to other + /// bundles. The weights are all positive and add up to at most 2, weights + /// from ingoing and outgoing nodes separately add up to a most 1. The weight + /// sum can be less than 2 when the variable is not live into / out of some + /// connected basic blocks. + LinkVector Links; + + /// preferReg - Return true when this node prefers to be in a register. + bool preferReg() const { + // Undecided nodes (Value==0) go on the stack. + return Value > 0; + } + + /// mustSpill - Return True if this node is so biased that it must spill. + bool mustSpill() const { + // Actually, we must spill if Bias < sum(weights). + // It may be worth it to compute the weight sum here? + return Bias < -2.0f; + } + + /// Node - Create a blank Node. + Node() { + Scale[0] = Scale[1] = 0; + } + + /// clear - Reset per-query data, but preserve frequencies that only depend on + // the CFG. + void clear() { + Bias = Value = 0; + Links.clear(); + } + + /// addLink - Add a link to bundle b with weight w. + /// out=0 for an ingoing link, and 1 for an outgoing link. + void addLink(unsigned b, float w, bool out) { + // Normalize w relative to all connected blocks from that direction. + w *= Scale[out]; + + // There can be multiple links to the same bundle, add them up. + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) + if (I->second == b) { + I->first += w; + return; + } + // This must be the first link to b. + Links.push_back(std::make_pair(w, b)); + } + + /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. + /// Return the change to the total number of positive biases. + void addBias(float w, bool out) { + // Normalize w relative to all connected blocks from that direction. + w *= Scale[out]; + Bias += w; + } + + /// update - Recompute Value from Bias and Links. Return true when node + /// preference changes. + bool update(const Node nodes[]) { + // Compute the weighted sum of inputs. + float Sum = Bias; + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) + Sum += I->first * nodes[I->second].Value; + + // The weighted sum is going to be in the range [-2;2]. Ideally, we should + // simply set Value = sign(Sum), but we will add a dead zone around 0 for + // two reasons: + // 1. It avoids arbitrary bias when all links are 0 as is possible during + // initial iterations. + // 2. It helps tame rounding errors when the links nominally sum to 0. + const float Thres = 1e-4f; + bool Before = preferReg(); + if (Sum < -Thres) + Value = -1; + else if (Sum > Thres) + Value = 1; + else + Value = 0; + return Before != preferReg(); + } +}; + +bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + bundles = &getAnalysis<EdgeBundles>(); + loops = &getAnalysis<MachineLoopInfo>(); + + assert(!nodes && "Leaking node array"); + nodes = new Node[bundles->getNumBundles()]; + + // Compute total ingoing and outgoing block frequencies for all bundles. + BlockFrequency.resize(mf.getNumBlockIDs()); + for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { + float Freq = LiveIntervals::getSpillWeight(true, false, + loops->getLoopDepth(I)); + unsigned Num = I->getNumber(); + BlockFrequency[Num] = Freq; + nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; + nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; + } + + // Scales are reciprocal frequencies. + for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) + for (unsigned d = 0; d != 2; ++d) + if (nodes[i].Scale[d] > 0) + nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; + + // We never change the function. + return false; +} + +void SpillPlacement::releaseMemory() { + delete[] nodes; + nodes = 0; +} + +/// activate - mark node n as active if it wasn't already. +void SpillPlacement::activate(unsigned n) { + if (ActiveNodes->test(n)) + return; + ActiveNodes->set(n); + nodes[n].clear(); + + // Very large bundles usually come from big switches, indirect branches, + // landing pads, or loops with many 'continue' statements. It is difficult to + // allocate registers when so many different blocks are involved. + // + // Give a small negative bias to large bundles such that 1/32 of the + // connected blocks need to be interested before we consider expanding the + // region through the bundle. This helps compile time by limiting the number + // of blocks visited and the number of links in the Hopfield network. + if (bundles->getBlocks(n).size() > 100) + nodes[n].Bias = -0.0625f; +} + + +/// addConstraints - Compute node biases and weights from a set of constraints. +/// Set a bit in NodeMask for each active node. +void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { + for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), + E = LiveBlocks.end(); I != E; ++I) { + float Freq = getBlockFrequency(I->Number); + const float Bias[] = { + 0, // DontCare, + 1, // PrefReg, + -1, // PrefSpill + 0, // PrefBoth + -HUGE_VALF // MustSpill + }; + + // Live-in to block? + if (I->Entry != DontCare) { + unsigned ib = bundles->getBundle(I->Number, 0); + activate(ib); + nodes[ib].addBias(Freq * Bias[I->Entry], 1); + } + + // Live-out from block? + if (I->Exit != DontCare) { + unsigned ob = bundles->getBundle(I->Number, 1); + activate(ob); + nodes[ob].addBias(Freq * Bias[I->Exit], 0); + } + } +} + +/// addPrefSpill - Same as addConstraints(PrefSpill) +void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + float Freq = getBlockFrequency(*I); + if (Strong) + Freq += Freq; + unsigned ib = bundles->getBundle(*I, 0); + unsigned ob = bundles->getBundle(*I, 1); + activate(ib); + activate(ob); + nodes[ib].addBias(-Freq, 1); + nodes[ob].addBias(-Freq, 0); + } +} + +void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { + for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E; + ++I) { + unsigned Number = *I; + unsigned ib = bundles->getBundle(Number, 0); + unsigned ob = bundles->getBundle(Number, 1); + + // Ignore self-loops. + if (ib == ob) + continue; + activate(ib); + activate(ob); + if (nodes[ib].Links.empty() && !nodes[ib].mustSpill()) + Linked.push_back(ib); + if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) + Linked.push_back(ob); + float Freq = getBlockFrequency(Number); + nodes[ib].addLink(ob, Freq, 1); + nodes[ob].addLink(ib, Freq, 0); + } +} + +bool SpillPlacement::scanActiveBundles() { + Linked.clear(); + RecentPositive.clear(); + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + nodes[n].update(nodes); + // A node that must spill, or a node without any links is not going to + // change its value ever again, so exclude it from iterations. + if (nodes[n].mustSpill()) + continue; + if (!nodes[n].Links.empty()) + Linked.push_back(n); + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + return !RecentPositive.empty(); +} + +/// iterate - Repeatedly update the Hopfield nodes until stability or the +/// maximum number of iterations is reached. +/// @param Linked - Numbers of linked nodes that need updating. +void SpillPlacement::iterate() { + // First update the recently positive nodes. They have likely received new + // negative bias that will turn them off. + while (!RecentPositive.empty()) + nodes[RecentPositive.pop_back_val()].update(nodes); + + if (Linked.empty()) + return; + + // Run up to 10 iterations. The edge bundle numbering is closely related to + // basic block numbering, so there is a strong tendency towards chains of + // linked nodes with sequential numbers. By scanning the linked nodes + // backwards and forwards, we make it very likely that a single node can + // affect the entire network in a single iteration. That means very fast + // convergence, usually in a single iteration. + for (unsigned iteration = 0; iteration != 10; ++iteration) { + // Scan backwards, skipping the last node which was just updated. + bool Changed = false; + for (SmallVectorImpl<unsigned>::const_reverse_iterator I = + llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { + unsigned n = *I; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + } + if (!Changed || !RecentPositive.empty()) + return; + + // Scan forwards, skipping the first node which was just updated. + Changed = false; + for (SmallVectorImpl<unsigned>::const_iterator I = + llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { + unsigned n = *I; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + } + if (!Changed || !RecentPositive.empty()) + return; + } +} + +void SpillPlacement::prepare(BitVector &RegBundles) { + Linked.clear(); + RecentPositive.clear(); + // Reuse RegBundles as our ActiveNodes vector. + ActiveNodes = &RegBundles; + ActiveNodes->clear(); + ActiveNodes->resize(bundles->getNumBundles()); +} + +bool +SpillPlacement::finish() { + assert(ActiveNodes && "Call prepare() first"); + + // Write preferences back to ActiveNodes. + bool Perfect = true; + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) + if (!nodes[n].preferReg()) { + ActiveNodes->reset(n); + Perfect = false; + } + ActiveNodes = 0; + return Perfect; +} diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h new file mode 100644 index 0000000..fc412f8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -0,0 +1,156 @@ +//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis computes the optimal spill code placement between basic blocks. +// +// The runOnMachineFunction() method only precomputes some profiling information +// about the CFG. The real work is done by prepare(), addConstraints(), and +// finish() which are called by the register allocator. +// +// Given a variable that is live across multiple basic blocks, and given +// constraints on the basic blocks where the variable is live, determine which +// edge bundles should have the variable in a register and which edge bundles +// should have the variable in a stack slot. +// +// The returned bit vector can be used to place optimal spill code at basic +// block entries and exits. Spill code placement inside a basic block is not +// considered. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H +#define LLVM_CODEGEN_SPILLPLACEMENT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class BitVector; +class EdgeBundles; +class MachineBasicBlock; +class MachineLoopInfo; + +class SpillPlacement : public MachineFunctionPass { + struct Node; + const MachineFunction *MF; + const EdgeBundles *bundles; + const MachineLoopInfo *loops; + Node *nodes; + + // Nodes that are active in the current computation. Owned by the prepare() + // caller. + BitVector *ActiveNodes; + + // Nodes with active links. Populated by scanActiveBundles. + SmallVector<unsigned, 8> Linked; + + // Nodes that went positive during the last call to scanActiveBundles or + // iterate. + SmallVector<unsigned, 8> RecentPositive; + + // Block frequencies are computed once. Indexed by block number. + SmallVector<float, 4> BlockFrequency; + +public: + static char ID; // Pass identification, replacement for typeid. + + SpillPlacement() : MachineFunctionPass(ID), nodes(0) {} + ~SpillPlacement() { releaseMemory(); } + + /// BorderConstraint - A basic block has separate constraints for entry and + /// exit. + enum BorderConstraint { + DontCare, ///< Block doesn't care / variable not live. + PrefReg, ///< Block entry/exit prefers a register. + PrefSpill, ///< Block entry/exit prefers a stack slot. + PrefBoth, ///< Block entry prefers both register and stack. + MustSpill ///< A register is impossible, variable must be spilled. + }; + + /// BlockConstraint - Entry and exit constraints for a basic block. + struct BlockConstraint { + unsigned Number; ///< Basic block number (from MBB::getNumber()). + BorderConstraint Entry : 8; ///< Constraint on block entry. + BorderConstraint Exit : 8; ///< Constraint on block exit. + + /// True when this block changes the value of the live range. This means + /// the block has a non-PHI def. When this is false, a live-in value on + /// the stack can be live-out on the stack without inserting a spill. + bool ChangesValue; + }; + + /// prepare - Reset state and prepare for a new spill placement computation. + /// @param RegBundles Bit vector to receive the edge bundles where the + /// variable should be kept in a register. Each bit + /// corresponds to an edge bundle, a set bit means the + /// variable should be kept in a register through the + /// bundle. A clear bit means the variable should be + /// spilled. This vector is retained. + void prepare(BitVector &RegBundles); + + /// addConstraints - Add constraints and biases. This method may be called + /// more than once to accumulate constraints. + /// @param LiveBlocks Constraints for blocks that have the variable live in or + /// live out. + void addConstraints(ArrayRef<BlockConstraint> LiveBlocks); + + /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is + /// equivalent to calling addConstraint with identical BlockConstraints with + /// Entry = Exit = PrefSpill, and ChangesValue = false. + /// + /// @param Blocks Array of block numbers that prefer to spill in and out. + /// @param Strong When true, double the negative bias for these blocks. + void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong); + + /// addLinks - Add transparent blocks with the given numbers. + void addLinks(ArrayRef<unsigned> Links); + + /// scanActiveBundles - Perform an initial scan of all bundles activated by + /// addConstraints and addLinks, updating their state. Add all the bundles + /// that now prefer a register to RecentPositive. + /// Prepare internal data structures for iterate. + /// Return true is there are any positive nodes. + bool scanActiveBundles(); + + /// iterate - Update the network iteratively until convergence, or new bundles + /// are found. + void iterate(); + + /// getRecentPositive - Return an array of bundles that became positive during + /// the previous call to scanActiveBundles or iterate. + ArrayRef<unsigned> getRecentPositive() { return RecentPositive; } + + /// finish - Compute the optimal spill code placement given the + /// constraints. No MustSpill constraints will be violated, and the smallest + /// possible number of PrefX constraints will be violated, weighted by + /// expected execution frequencies. + /// The selected bundles are returned in the bitvector passed to prepare(). + /// @return True if a perfect solution was found, allowing the variable to be + /// in a register through all relevant bundles. + bool finish(); + + /// getBlockFrequency - Return the estimated block execution frequency per + /// function invocation. + float getBlockFrequency(unsigned Number) const { + return BlockFrequency[Number]; + } + +private: + virtual bool runOnMachineFunction(MachineFunction&); + virtual void getAnalysisUsage(AnalysisUsage&) const; + virtual void releaseMemory(); + + void activate(unsigned); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp new file mode 100644 index 0000000..209792f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -0,0 +1,194 @@ +//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" + +#include "Spiller.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + enum SpillerName { trivial, inline_ }; +} + +static cl::opt<SpillerName> +spillerOpt("spiller", + cl::desc("Spiller to use: (default: standard)"), + cl::Prefix, + cl::values(clEnumVal(trivial, "trivial spiller"), + clEnumValN(inline_, "inline", "inline spiller"), + clEnumValEnd), + cl::init(trivial)); + +// Spiller virtual destructor implementation. +Spiller::~Spiller() {} + +namespace { + +/// Utility class for spillers. +class SpillerBase : public Spiller { +protected: + MachineFunctionPass *pass; + MachineFunction *mf; + VirtRegMap *vrm; + LiveIntervals *lis; + MachineFrameInfo *mfi; + MachineRegisterInfo *mri; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + + /// Construct a spiller base. + SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) + : pass(&pass), mf(&mf), vrm(&vrm) + { + lis = &pass.getAnalysis<LiveIntervals>(); + mfi = mf.getFrameInfo(); + mri = &mf.getRegInfo(); + tii = mf.getTarget().getInstrInfo(); + tri = mf.getTarget().getRegisterInfo(); + } + + /// Add spill ranges for every use/def of the live interval, inserting loads + /// immediately before each use, and stores after each def. No folding or + /// remat is attempted. + void trivialSpillEverywhere(LiveRangeEdit& LRE) { + LiveInterval* li = &LRE.getParent(); + + DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); + + assert(li->weight != HUGE_VALF && + "Attempting to spill already spilled value."); + + assert(!TargetRegisterInfo::isStackSlot(li->reg) && + "Trying to spill a stack slot."); + + DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); + + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + unsigned ss = vrm->assignVirt2StackSlot(li->reg); + + // Iterate over reg uses/defs. + for (MachineRegisterInfo::reg_iterator + regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) { + + // Grab the use/def instr. + MachineInstr *mi = &*regItr; + + DEBUG(dbgs() << " Processing " << *mi); + + // Step regItr to the next use/def instr. + do { + ++regItr; + } while (regItr != mri->reg_end() && (&*regItr == mi)); + + // Collect uses & defs for this instr. + SmallVector<unsigned, 2> indices; + bool hasUse = false; + bool hasDef = false; + for (unsigned i = 0; i != mi->getNumOperands(); ++i) { + MachineOperand &op = mi->getOperand(i); + if (!op.isReg() || op.getReg() != li->reg) + continue; + hasUse |= mi->getOperand(i).isUse(); + hasDef |= mi->getOperand(i).isDef(); + indices.push_back(i); + } + + // Create a new vreg & interval for this instr. + LiveInterval *newLI = &LRE.create(); + newLI->weight = HUGE_VALF; + + // Update the reg operands & kill flags. + for (unsigned i = 0; i < indices.size(); ++i) { + unsigned mopIdx = indices[i]; + MachineOperand &mop = mi->getOperand(mopIdx); + mop.setReg(newLI->reg); + if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { + mop.setIsKill(true); + } + } + assert(hasUse || hasDef); + + // Insert reload if necessary. + MachineBasicBlock::iterator miItr(mi); + if (hasUse) { + tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc, + tri); + MachineInstr *loadInstr(prior(miItr)); + SlotIndex loadIndex = + lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); + SlotIndex endIndex = loadIndex.getNextIndex(); + VNInfo *loadVNI = + newLI->getNextValue(loadIndex, lis->getVNInfoAllocator()); + newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); + } + + // Insert store if necessary. + if (hasDef) { + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg, + true, ss, trc, tri); + MachineInstr *storeInstr(llvm::next(miItr)); + SlotIndex storeIndex = + lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); + SlotIndex beginIndex = storeIndex.getPrevIndex(); + VNInfo *storeVNI = + newLI->getNextValue(beginIndex, lis->getVNInfoAllocator()); + newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); + } + } + } +}; + +} // end anonymous namespace + +namespace { + +/// Spills any live range using the spill-everywhere method with no attempt at +/// folding. +class TrivialSpiller : public SpillerBase { +public: + + TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : SpillerBase(pass, mf, vrm) {} + + void spill(LiveRangeEdit &LRE) { + // Ignore spillIs - we don't use it. + trivialSpillEverywhere(LRE); + } +}; + +} // end anonymous namespace + +void Spiller::anchor() { } + +llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + switch (spillerOpt) { + case trivial: return new TrivialSpiller(pass, mf, vrm); + case inline_: return createInlineSpiller(pass, mf, vrm); + } + llvm_unreachable("Invalid spiller optimization"); +} diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h new file mode 100644 index 0000000..b7d5bea --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Spiller.h @@ -0,0 +1,47 @@ +//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPILLER_H +#define LLVM_CODEGEN_SPILLER_H + +namespace llvm { + + class LiveRangeEdit; + class MachineFunction; + class MachineFunctionPass; + class VirtRegMap; + + /// Spiller interface. + /// + /// Implementations are utility classes which insert spill or remat code on + /// demand. + class Spiller { + virtual void anchor(); + public: + virtual ~Spiller() = 0; + + /// spill - Spill the LRE.getParent() live interval. + virtual void spill(LiveRangeEdit &LRE) = 0; + + }; + + /// Create and return a spiller object, as specified on the command line. + Spiller* createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); + + /// Create and return a spiller that will insert spill code directly instead + /// of deferring though VirtRegMap. + Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp new file mode 100644 index 0000000..0a3818e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -0,0 +1,1432 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "SplitKit.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +STATISTIC(NumFinished, "Number of splits finished"); +STATISTIC(NumSimple, "Number of splits that were simple"); +STATISTIC(NumCopies, "Number of copies inserted for splitting"); +STATISTIC(NumRemats, "Number of rematerialized defs for splitting"); +STATISTIC(NumRepairs, "Number of invalid live ranges repaired"); + +//===----------------------------------------------------------------------===// +// Split Analysis +//===----------------------------------------------------------------------===// + +SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, + const LiveIntervals &lis, + const MachineLoopInfo &mli) + : MF(vrm.getMachineFunction()), + VRM(vrm), + LIS(lis), + Loops(mli), + TII(*MF.getTarget().getInstrInfo()), + CurLI(0), + LastSplitPoint(MF.getNumBlockIDs()) {} + +void SplitAnalysis::clear() { + UseSlots.clear(); + UseBlocks.clear(); + ThroughBlocks.clear(); + CurLI = 0; + DidRepairRange = false; +} + +SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { + const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); + const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); + std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num]; + SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); + + // Compute split points on the first call. The pair is independent of the + // current live interval. + if (!LSP.first.isValid()) { + MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); + if (FirstTerm == MBB->end()) + LSP.first = MBBEnd; + else + LSP.first = LIS.getInstructionIndex(FirstTerm); + + // If there is a landing pad successor, also find the call instruction. + if (!LPad) + return LSP.first; + // There may not be a call instruction (?) in which case we ignore LPad. + LSP.second = LSP.first; + for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); + I != E;) { + --I; + if (I->isCall()) { + LSP.second = LIS.getInstructionIndex(I); + break; + } + } + } + + // If CurLI is live into a landing pad successor, move the last split point + // back to the call that may throw. + if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad)) + return LSP.first; + + // Find the value leaving MBB. + const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd); + if (!VNI) + return LSP.first; + + // If the value leaving MBB was defined after the call in MBB, it can't + // really be live-in to the landing pad. This can happen if the landing pad + // has a PHI, and this register is undef on the exceptional edge. + // <rdar://problem/10664933> + if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd) + return LSP.first; + + // Value is properly live-in to the landing pad. + // Only allow splits before the call. + return LSP.second; +} + +MachineBasicBlock::iterator +SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) { + SlotIndex LSP = getLastSplitPoint(MBB->getNumber()); + if (LSP == LIS.getMBBEndIdx(MBB)) + return MBB->end(); + return LIS.getInstructionFromIndex(LSP); +} + +/// analyzeUses - Count instructions, basic blocks, and loops using CurLI. +void SplitAnalysis::analyzeUses() { + assert(UseSlots.empty() && "Call clear first"); + + // First get all the defs from the interval values. This provides the correct + // slots for early clobbers. + for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(), + E = CurLI->vni_end(); I != E; ++I) + if (!(*I)->isPHIDef() && !(*I)->isUnused()) + UseSlots.push_back((*I)->def); + + // Get use slots form the use-def chain. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; + ++I) + if (!I.getOperand().isUndef()) + UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot()); + + array_pod_sort(UseSlots.begin(), UseSlots.end()); + + // Remove duplicates, keeping the smaller slot for each instruction. + // That is what we want for early clobbers. + UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(), + SlotIndex::isSameInstr), + UseSlots.end()); + + // Compute per-live block info. + if (!calcLiveBlockInfo()) { + // FIXME: calcLiveBlockInfo found inconsistencies in the live range. + // I am looking at you, RegisterCoalescer! + DidRepairRange = true; + ++NumRepairs; + DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); + const_cast<LiveIntervals&>(LIS) + .shrinkToUses(const_cast<LiveInterval*>(CurLI)); + UseBlocks.clear(); + ThroughBlocks.clear(); + bool fixed = calcLiveBlockInfo(); + (void)fixed; + assert(fixed && "Couldn't fix broken live interval"); + } + + DEBUG(dbgs() << "Analyze counted " + << UseSlots.size() << " instrs in " + << UseBlocks.size() << " blocks, through " + << NumThroughBlocks << " blocks.\n"); +} + +/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks +/// where CurLI is live. +bool SplitAnalysis::calcLiveBlockInfo() { + ThroughBlocks.resize(MF.getNumBlockIDs()); + NumThroughBlocks = NumGapBlocks = 0; + if (CurLI->empty()) + return true; + + LiveInterval::const_iterator LVI = CurLI->begin(); + LiveInterval::const_iterator LVE = CurLI->end(); + + SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE; + UseI = UseSlots.begin(); + UseE = UseSlots.end(); + + // Loop over basic blocks where CurLI is live. + MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start); + for (;;) { + BlockInfo BI; + BI.MBB = MFI; + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + // If the block contains no uses, the range must be live through. At one + // point, RegisterCoalescer could create dangling ranges that ended + // mid-block. + if (UseI == UseE || *UseI >= Stop) { + ++NumThroughBlocks; + ThroughBlocks.set(BI.MBB->getNumber()); + // The range shouldn't end mid-block if there are no uses. This shouldn't + // happen. + if (LVI->end < Stop) + return false; + } else { + // This block has uses. Find the first and last uses in the block. + BI.FirstInstr = *UseI; + assert(BI.FirstInstr >= Start); + do ++UseI; + while (UseI != UseE && *UseI < Stop); + BI.LastInstr = UseI[-1]; + assert(BI.LastInstr < Stop); + + // LVI is the first live segment overlapping MBB. + BI.LiveIn = LVI->start <= Start; + + // When not live in, the first use should be a def. + if (!BI.LiveIn) { + assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + assert(LVI->start == BI.FirstInstr && "First instr should be a def"); + BI.FirstDef = BI.FirstInstr; + } + + // Look for gaps in the live range. + BI.LiveOut = true; + while (LVI->end < Stop) { + SlotIndex LastStop = LVI->end; + if (++LVI == LVE || LVI->start >= Stop) { + BI.LiveOut = false; + BI.LastInstr = LastStop; + break; + } + + if (LastStop < LVI->start) { + // There is a gap in the live range. Create duplicate entries for the + // live-in snippet and the live-out snippet. + ++NumGapBlocks; + + // Push the Live-in part. + BI.LiveOut = false; + UseBlocks.push_back(BI); + UseBlocks.back().LastInstr = LastStop; + + // Set up BI for the live-out part. + BI.LiveIn = false; + BI.LiveOut = true; + BI.FirstInstr = BI.FirstDef = LVI->start; + } + + // A LiveRange that starts in the middle of the block must be a def. + assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + if (!BI.FirstDef) + BI.FirstDef = LVI->start; + } + + UseBlocks.push_back(BI); + + // LVI is now at LVE or LVI->end >= Stop. + if (LVI == LVE) + break; + } + + // Live segment ends exactly at Stop. Move to the next segment. + if (LVI->end == Stop && ++LVI == LVE) + break; + + // Pick the next basic block. + if (LVI->start < Stop) + ++MFI; + else + MFI = LIS.getMBBFromIndex(LVI->start); + } + + assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count"); + return true; +} + +unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { + if (cli->empty()) + return 0; + LiveInterval *li = const_cast<LiveInterval*>(cli); + LiveInterval::iterator LVI = li->begin(); + LiveInterval::iterator LVE = li->end(); + unsigned Count = 0; + + // Loop over basic blocks where li is live. + MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start); + SlotIndex Stop = LIS.getMBBEndIdx(MFI); + for (;;) { + ++Count; + LVI = li->advanceTo(LVI, Stop); + if (LVI == LVE) + return Count; + do { + ++MFI; + Stop = LIS.getMBBEndIdx(MFI); + } while (Stop <= LVI->start); + } +} + +bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { + unsigned OrigReg = VRM.getOriginal(CurLI->reg); + const LiveInterval &Orig = LIS.getInterval(OrigReg); + assert(!Orig.empty() && "Splitting empty interval?"); + LiveInterval::const_iterator I = Orig.find(Idx); + + // Range containing Idx should begin at Idx. + if (I != Orig.end() && I->start <= Idx) + return I->start == Idx; + + // Range does not contain Idx, previous must end at Idx. + return I != Orig.begin() && (--I)->end == Idx; +} + +void SplitAnalysis::analyze(const LiveInterval *li) { + clear(); + CurLI = li; + analyzeUses(); +} + + +//===----------------------------------------------------------------------===// +// Split Editor +//===----------------------------------------------------------------------===// + +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, + LiveIntervals &lis, + VirtRegMap &vrm, + MachineDominatorTree &mdt) + : SA(sa), LIS(lis), VRM(vrm), + MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), + TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + Edit(0), + OpenIdx(0), + SpillMode(SM_Partition), + RegAssign(Allocator) +{} + +void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { + Edit = &LRE; + SpillMode = SM; + OpenIdx = 0; + RegAssign.clear(); + Values.clear(); + + // Reset the LiveRangeCalc instances needed for this spill mode. + LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + if (SpillMode) + LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + + // We don't need an AliasAnalysis since we will only be performing + // cheap-as-a-copy remats anyway. + Edit->anyRematerializable(0); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void SplitEditor::dump() const { + if (RegAssign.empty()) { + dbgs() << " empty\n"; + return; + } + + for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) + dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); + dbgs() << '\n'; +} +#endif + +VNInfo *SplitEditor::defValue(unsigned RegIdx, + const VNInfo *ParentVNI, + SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); + LiveInterval *LI = Edit->get(RegIdx); + + // Create a new value. + VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); + + // Use insert for lookup, so we can add missing values with a second lookup. + std::pair<ValueMap::iterator, bool> InsP = + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), + ValueForcePair(VNI, false))); + + // This was the first time (RegIdx, ParentVNI) was mapped. + // Keep it as a simple def without any liveness. + if (InsP.second) + return VNI; + + // If the previous value was a simple mapping, add liveness for it now. + if (VNInfo *OldVNI = InsP.first->second.getPointer()) { + SlotIndex Def = OldVNI->def; + LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); + // No longer a simple mapping. Switch to a complex, non-forced mapping. + InsP.first->second = ValueForcePair(); + } + + // This is a complex mapping, add liveness for VNI + SlotIndex Def = VNI->def; + LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + + return VNI; +} + +void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { + assert(ParentVNI && "Mapping NULL value"); + ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)]; + VNInfo *VNI = VFP.getPointer(); + + // ParentVNI was either unmapped or already complex mapped. Either way, just + // set the force bit. + if (!VNI) { + VFP.setInt(true); + return; + } + + // This was previously a single mapping. Make sure the old def is represented + // by a trivial live range. + SlotIndex Def = VNI->def; + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + // Mark as complex mapped, forced. + VFP = ValueForcePair(0, true); +} + +VNInfo *SplitEditor::defFromParent(unsigned RegIdx, + VNInfo *ParentVNI, + SlotIndex UseIdx, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + MachineInstr *CopyMI = 0; + SlotIndex Def; + LiveInterval *LI = Edit->get(RegIdx); + + // We may be trying to avoid interference that ends at a deleted instruction, + // so always begin RegIdx 0 early and all others late. + bool Late = RegIdx != 0; + + // Attempt cheap-as-a-copy rematerialization. + LiveRangeEdit::Remat RM(ParentVNI); + if (Edit->canRematerializeAt(RM, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); + ++NumRemats; + } else { + // Can't remat, just insert a copy from parent. + CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) + .addReg(Edit->getReg()); + Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) + .getRegSlot(); + ++NumCopies; + } + + // Define the value in Reg. + return defValue(RegIdx, ParentVNI, Def); +} + +/// Create a new virtual register and live interval. +unsigned SplitEditor::openIntv() { + // Create the complement as index 0. + if (Edit->empty()) + Edit->create(); + + // Create the open interval. + OpenIdx = Edit->size(); + Edit->create(); + return OpenIdx; +} + +void SplitEditor::selectIntv(unsigned Idx) { + assert(Idx != 0 && "Cannot select the complement interval"); + assert(Idx < Edit->size() && "Can only select previously opened interval"); + DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n'); + OpenIdx = Idx; +} + +SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvBefore"); + DEBUG(dbgs() << " enterIntvBefore " << Idx); + Idx = Idx.getBaseIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvBefore called with invalid index"); + + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI); + return VNI->def; +} + +SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvAfter"); + DEBUG(dbgs() << " enterIntvAfter " << Idx); + Idx = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvAfter called with invalid index"); + + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), + llvm::next(MachineBasicBlock::iterator(MI))); + return VNI->def; +} + +SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { + assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); + SlotIndex End = LIS.getMBBEndIdx(&MBB); + SlotIndex Last = End.getPrevSlot(); + DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return End; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id); + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, + SA.getLastSplitPointIter(&MBB)); + RegAssign.insert(VNI->def, End, OpenIdx); + DEBUG(dump()); + return VNI->def; +} + +/// useIntv - indicate that all instructions in MBB should use OpenLI. +void SplitEditor::useIntv(const MachineBasicBlock &MBB) { + useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB)); +} + +void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) { + assert(OpenIdx && "openIntv not called before useIntv"); + DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):"); + RegAssign.insert(Start, End, OpenIdx); + DEBUG(dump()); +} + +SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before leaveIntvAfter"); + DEBUG(dbgs() << " leaveIntvAfter " << Idx); + + // The interval must be live beyond the instruction at Idx. + SlotIndex Boundary = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Boundary.getNextSlot(); + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Boundary); + assert(MI && "No instruction at index"); + + // In spill mode, make live ranges as short as possible by inserting the copy + // before MI. This is only possible if that instruction doesn't redefine the + // value. The inserted COPY is not a kill, and we don't need to recompute + // the source live range. The spiller also won't try to hoist this copy. + if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) && + MI->readsVirtualRegister(Edit->getReg())) { + forceRecompute(0, ParentVNI); + defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); + return Idx; + } + + VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(), + llvm::next(MachineBasicBlock::iterator(MI))); + return VNI->def; +} + +SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before leaveIntvBefore"); + DEBUG(dbgs() << " leaveIntvBefore " << Idx); + + // The interval must be live into the instruction at Idx. + Idx = Idx.getBaseIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx.getNextSlot(); + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "No instruction at index"); + VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); + return VNI->def; +} + +SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { + assert(OpenIdx && "openIntv not called before leaveIntvAtTop"); + SlotIndex Start = LIS.getMBBStartIdx(&MBB); + DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); + + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Start; + } + + VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB, + MBB.SkipPHIsAndLabels(MBB.begin())); + RegAssign.insert(Start, VNI->def, OpenIdx); + DEBUG(dump()); + return VNI->def; +} + +void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { + assert(OpenIdx && "openIntv not called before overlapIntv"); + const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); + assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) && + "Parent changes value in extended range"); + assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && + "Range cannot span basic blocks"); + + // The complement interval will be extended as needed by LRCalc.extend(). + if (ParentVNI) + forceRecompute(0, ParentVNI); + DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); + RegAssign.insert(Start, End, OpenIdx); + DEBUG(dump()); +} + +//===----------------------------------------------------------------------===// +// Spill modes +//===----------------------------------------------------------------------===// + +void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { + LiveInterval *LI = Edit->get(0); + DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); + RegAssignMap::iterator AssignI; + AssignI.setMap(RegAssign); + + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + VNInfo *VNI = Copies[i]; + SlotIndex Def = VNI->def; + MachineInstr *MI = LIS.getInstructionFromIndex(Def); + assert(MI && "No instruction for back-copy"); + + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator MBBI(MI); + bool AtBegin; + do AtBegin = MBBI == MBB->begin(); + while (!AtBegin && (--MBBI)->isDebugValue()); + + DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); + LI->removeValNo(VNI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + + // Adjust RegAssign if a register assignment is killed at VNI->def. We + // want to avoid calculating the live range of the source register if + // possible. + AssignI.find(Def.getPrevSlot()); + if (!AssignI.valid() || AssignI.start() >= Def) + continue; + // If MI doesn't kill the assigned register, just leave it. + if (AssignI.stop() != Def) + continue; + unsigned RegIdx = AssignI.value(); + if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) { + DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); + forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); + } else { + SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot(); + DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); + AssignI.setStop(Kill); + } + } +} + +MachineBasicBlock* +SplitEditor::findShallowDominator(MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB) { + if (MBB == DefMBB) + return MBB; + assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def."); + + const MachineLoopInfo &Loops = SA.Loops; + const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB); + MachineDomTreeNode *DefDomNode = MDT[DefMBB]; + + // Best candidate so far. + MachineBasicBlock *BestMBB = MBB; + unsigned BestDepth = UINT_MAX; + + for (;;) { + const MachineLoop *Loop = Loops.getLoopFor(MBB); + + // MBB isn't in a loop, it doesn't get any better. All dominators have a + // higher frequency by definition. + if (!Loop) { + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " at depth 0\n"); + return MBB; + } + + // We'll never be able to exit the DefLoop. + if (Loop == DefLoop) { + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " in the same loop\n"); + return MBB; + } + + // Least busy dominator seen so far. + unsigned Depth = Loop->getLoopDepth(); + if (Depth < BestDepth) { + BestMBB = MBB; + BestDepth = Depth; + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " at depth " << Depth << '\n'); + } + + // Leave loop by going to the immediate dominator of the loop header. + // This is a bigger stride than simply walking up the dominator tree. + MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom(); + + // Too far up the dominator tree? + if (!IDom || !MDT.dominates(DefDomNode, IDom)) + return BestMBB; + + MBB = IDom->getBlock(); + } +} + +void SplitEditor::hoistCopiesForSize() { + // Get the complement interval, always RegIdx 0. + LiveInterval *LI = Edit->get(0); + LiveInterval *Parent = &Edit->getParent(); + + // Track the nearest common dominator for all back-copies for each ParentVNI, + // indexed by ParentVNI->id. + typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair; + SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums()); + + // Find the nearest common dominator for parent values with multiple + // back-copies. If a single back-copy dominates, put it in DomPair.second. + for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); + VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); + assert(ParentVNI && "Parent not live at complement def"); + + // Don't hoist remats. The complement is probably going to disappear + // completely anyway. + if (Edit->didRematerialize(ParentVNI)) + continue; + + MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def); + DomPair &Dom = NearestDom[ParentVNI->id]; + + // Keep directly defined parent values. This is either a PHI or an + // instruction in the complement range. All other copies of ParentVNI + // should be eliminated. + if (VNI->def == ParentVNI->def) { + DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n'); + Dom = DomPair(ValMBB, VNI->def); + continue; + } + // Skip the singly mapped values. There is nothing to gain from hoisting a + // single back-copy. + if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) { + DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n'); + continue; + } + + if (!Dom.first) { + // First time we see ParentVNI. VNI dominates itself. + Dom = DomPair(ValMBB, VNI->def); + } else if (Dom.first == ValMBB) { + // Two defs in the same block. Pick the earlier def. + if (!Dom.second.isValid() || VNI->def < Dom.second) + Dom.second = VNI->def; + } else { + // Different basic blocks. Check if one dominates. + MachineBasicBlock *Near = + MDT.findNearestCommonDominator(Dom.first, ValMBB); + if (Near == ValMBB) + // Def ValMBB dominates. + Dom = DomPair(ValMBB, VNI->def); + else if (Near != Dom.first) + // None dominate. Hoist to common dominator, need new def. + Dom = DomPair(Near, SlotIndex()); + } + + DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def + << " for parent " << ParentVNI->id << '@' << ParentVNI->def + << " hoist to BB#" << Dom.first->getNumber() << ' ' + << Dom.second << '\n'); + } + + // Insert the hoisted copies. + for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) { + DomPair &Dom = NearestDom[i]; + if (!Dom.first || Dom.second.isValid()) + continue; + // This value needs a hoisted copy inserted at the end of Dom.first. + VNInfo *ParentVNI = Parent->getValNumInfo(i); + MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def); + // Get a less loopy dominator than Dom.first. + Dom.first = findShallowDominator(Dom.first, DefMBB); + SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); + Dom.second = + defFromParent(0, ParentVNI, Last, *Dom.first, + SA.getLastSplitPointIter(Dom.first))->def; + } + + // Remove redundant back-copies that are now known to be dominated by another + // def with the same value. + SmallVector<VNInfo*, 8> BackCopies; + for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); + VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); + const DomPair &Dom = NearestDom[ParentVNI->id]; + if (!Dom.first || Dom.second == VNI->def) + continue; + BackCopies.push_back(VNI); + forceRecompute(0, ParentVNI); + } + removeBackCopies(BackCopies); +} + + +/// transferValues - Transfer all possible values to the new live ranges. +/// Values that were rematerialized are left alone, they need LRCalc.extend(). +bool SplitEditor::transferValues() { + bool Skipped = false; + RegAssignMap::const_iterator AssignI = RegAssign.begin(); + for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), + ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { + DEBUG(dbgs() << " blit " << *ParentI << ':'); + VNInfo *ParentVNI = ParentI->valno; + // RegAssign has holes where RegIdx 0 should be used. + SlotIndex Start = ParentI->start; + AssignI.advanceTo(Start); + do { + unsigned RegIdx; + SlotIndex End = ParentI->end; + if (!AssignI.valid()) { + RegIdx = 0; + } else if (AssignI.start() <= Start) { + RegIdx = AssignI.value(); + if (AssignI.stop() < End) { + End = AssignI.stop(); + ++AssignI; + } + } else { + RegIdx = 0; + End = std::min(End, AssignI.start()); + } + + // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. + DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // Check for a simply defined value that can be blitted directly. + ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); + if (VNInfo *VNI = VFP.getPointer()) { + DEBUG(dbgs() << ':' << VNI->id); + LI->addRange(LiveRange(Start, End, VNI)); + Start = End; + continue; + } + + // Skip values with forced recomputation. + if (VFP.getInt()) { + DEBUG(dbgs() << "(recalc)"); + Skipped = true; + Start = End; + continue; + } + + LiveRangeCalc &LRC = getLRCalc(RegIdx); + + // This value has multiple defs in RegIdx, but it wasn't rematerialized, + // so the live range is accurate. Add live-in blocks in [Start;End) to the + // LiveInBlocks. + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + SlotIndex BlockStart, BlockEnd; + tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + + // The first block may be live-in, or it may have its own def. + if (Start != BlockStart) { + VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + assert(VNI && "Missing def for complex mapped value"); + DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + // MBB has its own def. Is it also live-out? + if (BlockEnd <= End) + LRC.setLiveOutValue(MBB, VNI); + + // Skip to the next block for live-in. + ++MBB; + BlockStart = BlockEnd; + } + + // Handle the live-in blocks covered by [Start;End). + assert(Start <= BlockStart && "Expected live-in block"); + while (BlockStart < End) { + DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + BlockEnd = LIS.getMBBEndIdx(MBB); + if (BlockStart == ParentVNI->def) { + // This block has the def of a parent PHI, so it isn't live-in. + assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); + VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + assert(VNI && "Missing def for complex mapped parent PHI"); + if (End >= BlockEnd) + LRC.setLiveOutValue(MBB, VNI); // Live-out as well. + } else { + // This block needs a live-in value. The last block covered may not + // be live-out. + if (End < BlockEnd) + LRC.addLiveInBlock(LI, MDT[MBB], End); + else { + // Live-through, and we don't know the value. + LRC.addLiveInBlock(LI, MDT[MBB]); + LRC.setLiveOutValue(MBB, 0); + } + } + BlockStart = BlockEnd; + ++MBB; + } + Start = End; + } while (Start != ParentI->end); + DEBUG(dbgs() << '\n'); + } + + LRCalc[0].calculateValues(); + if (SpillMode) + LRCalc[1].calculateValues(); + + return Skipped; +} + +void SplitEditor::extendPHIKillRanges() { + // Extend live ranges to be live-out for successor PHI values. + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { + const VNInfo *PHIVNI = *I; + if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(PHIVNI->def); + LiveInterval *LI = Edit->get(RegIdx); + LiveRangeCalc &LRC = getLRCalc(RegIdx); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex End = LIS.getMBBEndIdx(*PI); + SlotIndex LastUse = End.getPrevSlot(); + // The predecessor may not have a live-out value. That is OK, like an + // undef PHI operand. + if (Edit->getParent().liveAt(LastUse)) { + assert(RegAssign.lookup(LastUse) == RegIdx && + "Different register assignment in phi predecessor"); + LRC.extend(LI, End); + } + } + } +} + +/// rewriteAssigned - Rewrite all uses of Edit->getReg(). +void SplitEditor::rewriteAssigned(bool ExtendRanges) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + // LiveDebugVariables should have handled all DBG_VALUE instructions. + if (MI->isDebugValue()) { + DEBUG(dbgs() << "Zapping " << *MI); + MO.setReg(0); + continue; + } + + // <undef> operands don't really read the register, so it doesn't matter + // which register we choose. When the use operand is tied to a def, we must + // use the same register as the def, so just do that always. + SlotIndex Idx = LIS.getInstructionIndex(MI); + if (MO.isDef() || MO.isUndef()) + Idx = Idx.getRegSlot(MO.isEarlyClobber()); + + // Rewrite to the mapped register at Idx. + unsigned RegIdx = RegAssign.lookup(Idx); + LiveInterval *LI = Edit->get(RegIdx); + MO.setReg(LI->reg); + DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' + << Idx << ':' << RegIdx << '\t' << *MI); + + // Extend liveness to Idx if the instruction reads reg. + if (!ExtendRanges || MO.isUndef()) + continue; + + // Skip instructions that don't read Reg. + if (MO.isDef()) { + if (!MO.getSubReg() && !MO.isEarlyClobber()) + continue; + // We may wan't to extend a live range for a partial redef, or for a use + // tied to an early clobber. + Idx = Idx.getPrevSlot(); + if (!Edit->getParent().liveAt(Idx)) + continue; + } else + Idx = Idx.getRegSlot(true); + + getLRCalc(RegIdx).extend(LI, Idx.getNextSlot()); + } +} + +void SplitEditor::deleteRematVictims() { + SmallVector<MachineInstr*, 8> Dead; + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ + LiveInterval *LI = *I; + for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); + LII != LIE; ++LII) { + // Dead defs end at the dead slot. + if (LII->end != LII->valno->def.getDeadSlot()) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); + assert(MI && "Missing instruction for dead def"); + MI->addRegisterDead(LI->reg, &TRI); + + if (!MI->allDefsAreDead()) + continue; + + DEBUG(dbgs() << "All defs dead: " << *MI); + Dead.push_back(MI); + } + } + + if (Dead.empty()) + return; + + Edit->eliminateDeadDefs(Dead); +} + +void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { + ++NumFinished; + + // At this point, the live intervals in Edit contain VNInfos corresponding to + // the inserted copies. + + // Add the original defs from the parent interval. + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { + const VNInfo *ParentVNI = *I; + if (ParentVNI->isUnused()) + continue; + unsigned RegIdx = RegAssign.lookup(ParentVNI->def); + defValue(RegIdx, ParentVNI, ParentVNI->def); + + // Force rematted values to be recomputed everywhere. + // The new live ranges may be truncated. + if (Edit->didRematerialize(ParentVNI)) + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + forceRecompute(i, ParentVNI); + } + + // Hoist back-copies to the complement interval when in spill mode. + switch (SpillMode) { + case SM_Partition: + // Leave all back-copies as is. + break; + case SM_Size: + hoistCopiesForSize(); + break; + case SM_Speed: + llvm_unreachable("Spill mode 'speed' not implemented yet"); + } + + // Transfer the simply mapped values, check if any are skipped. + bool Skipped = transferValues(); + if (Skipped) + extendPHIKillRanges(); + else + ++NumSimple; + + // Rewrite virtual registers, possibly extending ranges. + rewriteAssigned(Skipped); + + // Delete defs that were rematted everywhere. + if (Skipped) + deleteRematVictims(); + + // Get rid of unused values and set phi-kill flags. + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) + (*I)->RenumberValues(LIS); + + // Provide a reverse mapping from original indices to Edit ranges. + if (LRMap) { + LRMap->clear(); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + LRMap->push_back(i); + } + + // Now check if any registers were separated into multiple components. + ConnectedVNInfoEqClasses ConEQ(LIS); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) { + // Don't use iterators, they are invalidated by create() below. + LiveInterval *li = Edit->get(i); + unsigned NumComp = ConEQ.Classify(li); + if (NumComp <= 1) + continue; + DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); + SmallVector<LiveInterval*, 8> dups; + dups.push_back(li); + for (unsigned j = 1; j != NumComp; ++j) + dups.push_back(&Edit->create()); + ConEQ.Distribute(&dups[0], MRI); + // The new intervals all map back to i. + if (LRMap) + LRMap->resize(Edit->size(), i); + } + + // Calculate spill weight and allocation hints for new intervals. + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); + + assert(!LRMap || LRMap->size() == Edit->size()); +} + + +//===----------------------------------------------------------------------===// +// Single Block Splitting +//===----------------------------------------------------------------------===// + +bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI, + bool SingleInstrs) const { + // Always split for multiple instructions. + if (!BI.isOneInstr()) + return true; + // Don't split for single instructions unless explicitly requested. + if (!SingleInstrs) + return false; + // Splitting a live-through range always makes progress. + if (BI.LiveIn && BI.LiveOut) + return true; + // No point in isolating a copy. It has no register class constraints. + if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike()) + return false; + // Finally, don't isolate an end point that was created by earlier splits. + return isOriginalEndpoint(BI.FirstInstr); +} + +void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { + openIntv(); + SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr, + LastSplitPoint)); + if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastInstr)); + } else { + // The last use is after the last valid split point. + SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); + useIntv(SegStart, SegStop); + overlapIntv(SegStop, BI.LastInstr); + } +} + + +//===----------------------------------------------------------------------===// +// Global Live Range Splitting Support +//===----------------------------------------------------------------------===// + +// These methods support a method of global live range splitting that uses a +// global algorithm to decide intervals for CFG edges. They will insert split +// points and color intervals in basic blocks while avoiding interference. +// +// Note that splitSingleBlock is also useful for blocks where both CFG edges +// are on the stack. + +void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter){ + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); + + DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop + << ") intf " << LeaveBefore << '-' << EnterAfter + << ", live-through " << IntvIn << " -> " << IntvOut); + + assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); + + assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block"); + assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf"); + assert((!EnterAfter || EnterAfter >= Start) && "Interference before block"); + + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + + if (!IntvOut) { + DEBUG(dbgs() << ", spill on entry.\n"); + // + // <<<<<<<<< Possible LeaveBefore interference. + // |-----------| Live through. + // -____________ Spill on entry. + // + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvAtTop(*MBB); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + (void)Idx; + return; + } + + if (!IntvIn) { + DEBUG(dbgs() << ", reload on exit.\n"); + // + // >>>>>>> Possible EnterAfter interference. + // |-----------| Live through. + // ___________-- Reload on exit. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAtEnd(*MBB); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + (void)Idx; + return; + } + + if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) { + DEBUG(dbgs() << ", straight through.\n"); + // + // |-----------| Live through. + // ------------- Straight through, same intv, no interference. + // + selectIntv(IntvOut); + useIntv(Start, Stop); + return; + } + + // We cannot legally insert splits after LSP. + SlotIndex LSP = SA.getLastSplitPoint(MBBNum); + assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf"); + + if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter || + LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { + DEBUG(dbgs() << ", switch avoiding interference.\n"); + // + // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ------======= Switch intervals between interference. + // + selectIntv(IntvOut); + SlotIndex Idx; + if (LeaveBefore && LeaveBefore < LSP) { + Idx = enterIntvBefore(LeaveBefore); + useIntv(Idx, Stop); + } else { + Idx = enterIntvAtEnd(*MBB); + } + selectIntv(IntvIn); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + DEBUG(dbgs() << ", create local intv for interference.\n"); + // + // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ==---------== Switch intervals before/after interference. + // + assert(LeaveBefore <= EnterAfter && "Missed case"); + + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + selectIntv(IntvIn); + Idx = leaveIntvBefore(LeaveBefore); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); +} + + +void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstInstr << '-' << BI.LastInstr + << ", reg-in " << IntvIn << ", leave before " << LeaveBefore + << (BI.LiveOut ? ", stack-out" : ", killed in block")); + + assert(IntvIn && "Must have register in"); + assert(BI.LiveIn && "Must be live-in"); + assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference"); + + if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) { + DEBUG(dbgs() << " before interference.\n"); + // + // <<< Interference after kill. + // |---o---x | Killed in block. + // ========= Use IntvIn everywhere. + // + selectIntv(IntvIn); + useIntv(Start, BI.LastInstr); + return; + } + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) { + // + // <<< Possible interference after last use. + // |---o---o---| Live-out on stack. + // =========____ Leave IntvIn after last use. + // + // < Interference after last use. + // |---o---o--o| Live-out on stack, late last use. + // ============ Copy to stack after LSP, overlap IntvIn. + // \_____ Stack interval is live-out. + // + if (BI.LastInstr < LSP) { + DEBUG(dbgs() << ", spill after last use before interference.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvAfter(BI.LastInstr); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } else { + DEBUG(dbgs() << ", spill before last split point.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvBefore(LSP); + overlapIntv(Idx, BI.LastInstr); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } + return; + } + + // The interference is overlapping somewhere we wanted to use IntvIn. That + // means we need to create a local interval that can be allocated a + // different register. + unsigned LocalIntv = openIntv(); + (void)LocalIntv; + DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); + + if (!BI.LiveOut || BI.LastInstr < LSP) { + // + // <<<<<<< Interference overlapping uses. + // |---o---o---| Live-out on stack. + // =====----____ Leave IntvIn before interference, then spill. + // + SlotIndex To = leaveIntvAfter(BI.LastInstr); + SlotIndex From = enterIntvBefore(LeaveBefore); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); + return; + } + + // <<<<<<< Interference overlapping uses. + // |---o---o--o| Live-out on stack, late last use. + // =====------- Copy to stack before LSP, overlap LocalIntv. + // \_____ Stack interval is live-out. + // + SlotIndex To = leaveIntvBefore(LSP); + overlapIntv(To, BI.LastInstr); + SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore)); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); +} + +void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstInstr << '-' << BI.LastInstr + << ", reg-out " << IntvOut << ", enter after " << EnterAfter + << (BI.LiveIn ? ", stack-in" : ", defined in block")); + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + assert(IntvOut && "Must have register out"); + assert(BI.LiveOut && "Must be live-out"); + assert((!EnterAfter || EnterAfter < LSP) && "Bad interference"); + + if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) { + DEBUG(dbgs() << " after interference.\n"); + // + // >>>> Interference before def. + // | o---o---| Defined in block. + // ========= Use IntvOut everywhere. + // + selectIntv(IntvOut); + useIntv(BI.FirstInstr, Stop); + return; + } + + if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) { + DEBUG(dbgs() << ", reload after interference.\n"); + // + // >>>> Interference before def. + // |---o---o---| Live-through, stack-in. + // ____========= Enter IntvOut before first use. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr)); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + // The interference is overlapping somewhere we wanted to use IntvOut. That + // means we need to create a local interval that can be allocated a + // different register. + DEBUG(dbgs() << ", interference overlaps uses.\n"); + // + // >>>>>>> Interference overlapping uses. + // |---o---o---| Live-through, stack-in. + // ____---====== Create local interval for interference range. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + openIntv(); + SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr)); + useIntv(From, Idx); +} diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h new file mode 100644 index 0000000..4005a3d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -0,0 +1,469 @@ +//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPLITKIT_H +#define LLVM_CODEGEN_SPLITKIT_H + +#include "LiveRangeCalc.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { + +class ConnectedVNInfoEqClasses; +class LiveInterval; +class LiveIntervals; +class LiveRangeEdit; +class MachineInstr; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterInfo; +class VirtRegMap; +class VNInfo; +class raw_ostream; + +/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting +/// opportunities. +class SplitAnalysis { +public: + const MachineFunction &MF; + const VirtRegMap &VRM; + const LiveIntervals &LIS; + const MachineLoopInfo &Loops; + const TargetInstrInfo &TII; + + /// Additional information about basic blocks where the current variable is + /// live. Such a block will look like one of these templates: + /// + /// 1. | o---x | Internal to block. Variable is only live in this block. + /// 2. |---x | Live-in, kill. + /// 3. | o---| Def, live-out. + /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks. + /// 5. |---o---o---| Live-through with uses or defs. + /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks. + /// + /// Two BlockInfo entries are created for template 4. One for the live-in + /// segment, and one for the live-out segment. These entries look as if the + /// block were split in the middle where the live range isn't live. + /// + /// Live-through blocks without any uses don't get BlockInfo entries. They + /// are simply listed in ThroughBlocks instead. + /// + struct BlockInfo { + MachineBasicBlock *MBB; + SlotIndex FirstInstr; ///< First instr accessing current reg. + SlotIndex LastInstr; ///< Last instr accessing current reg. + SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex(). + bool LiveIn; ///< Current reg is live in. + bool LiveOut; ///< Current reg is live out. + + /// isOneInstr - Returns true when this BlockInfo describes a single + /// instruction. + bool isOneInstr() const { + return SlotIndex::isSameInstr(FirstInstr, LastInstr); + } + }; + +private: + // Current live interval. + const LiveInterval *CurLI; + + // Sorted slot indexes of using instructions. + SmallVector<SlotIndex, 8> UseSlots; + + /// LastSplitPoint - Last legal split point in each basic block in the current + /// function. The first entry is the first terminator, the second entry is the + /// last valid split point for a variable that is live in to a landing pad + /// successor. + SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint; + + /// UseBlocks - Blocks where CurLI has uses. + SmallVector<BlockInfo, 8> UseBlocks; + + /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where + /// the live range has a gap. + unsigned NumGapBlocks; + + /// ThroughBlocks - Block numbers where CurLI is live through without uses. + BitVector ThroughBlocks; + + /// NumThroughBlocks - Number of live-through blocks. + unsigned NumThroughBlocks; + + /// DidRepairRange - analyze was forced to shrinkToUses(). + bool DidRepairRange; + + SlotIndex computeLastSplitPoint(unsigned Num); + + // Sumarize statistics by counting instructions using CurLI. + void analyzeUses(); + + /// calcLiveBlockInfo - Compute per-block information about CurLI. + bool calcLiveBlockInfo(); + +public: + SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, + const MachineLoopInfo &mli); + + /// analyze - set CurLI to the specified interval, and analyze how it may be + /// split. + void analyze(const LiveInterval *li); + + /// didRepairRange() - Returns true if CurLI was invalid and has been repaired + /// by analyze(). This really shouldn't happen, but sometimes the coalescer + /// can create live ranges that end in mid-air. + bool didRepairRange() const { return DidRepairRange; } + + /// clear - clear all data structures so SplitAnalysis is ready to analyze a + /// new interval. + void clear(); + + /// getParent - Return the last analyzed interval. + const LiveInterval &getParent() const { return *CurLI; } + + /// getLastSplitPoint - Return the base index of the last valid split point + /// in the basic block numbered Num. + SlotIndex getLastSplitPoint(unsigned Num) { + // Inline the common simple case. + if (LastSplitPoint[Num].first.isValid() && + !LastSplitPoint[Num].second.isValid()) + return LastSplitPoint[Num].first; + return computeLastSplitPoint(Num); + } + + /// getLastSplitPointIter - Returns the last split point as an iterator. + MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*); + + /// isOriginalEndpoint - Return true if the original live range was killed or + /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def, + /// and 'use' for an early-clobber def. + /// This can be used to recognize code inserted by earlier live range + /// splitting. + bool isOriginalEndpoint(SlotIndex Idx) const; + + /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI. + /// This include both use and def operands, at most one entry per instruction. + ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; } + + /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks + /// where CurLI has uses. + ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; } + + /// getNumThroughBlocks - Return the number of through blocks. + unsigned getNumThroughBlocks() const { return NumThroughBlocks; } + + /// isThroughBlock - Return true if CurLI is live through MBB without uses. + bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); } + + /// getThroughBlocks - Return the set of through blocks. + const BitVector &getThroughBlocks() const { return ThroughBlocks; } + + /// getNumLiveBlocks - Return the number of blocks where CurLI is live. + unsigned getNumLiveBlocks() const { + return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks(); + } + + /// countLiveBlocks - Return the number of blocks where li is live. This is + /// guaranteed to return the same number as getNumLiveBlocks() after calling + /// analyze(li). + unsigned countLiveBlocks(const LiveInterval *li) const; + + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; + + /// shouldSplitSingleBlock - Returns true if it would help to create a local + /// live range for the instructions in BI. There is normally no benefit to + /// creating a live range for a single instruction, but it does enable + /// register class inflation if the instruction has a restricted register + /// class. + /// + /// @param BI The block to be isolated. + /// @param SingleInstrs True when single instructions should be isolated. + bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const; +}; + + +/// SplitEditor - Edit machine code and LiveIntervals for live range +/// splitting. +/// +/// - Create a SplitEditor from a SplitAnalysis. +/// - Start a new live interval with openIntv. +/// - Mark the places where the new interval is entered using enterIntv* +/// - Mark the ranges where the new interval is used with useIntv* +/// - Mark the places where the interval is exited with exitIntv*. +/// - Finish the current interval with closeIntv and repeat from 2. +/// - Rewrite instructions with finish(). +/// +class SplitEditor { + SplitAnalysis &SA; + LiveIntervals &LIS; + VirtRegMap &VRM; + MachineRegisterInfo &MRI; + MachineDominatorTree &MDT; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + +public: + + /// ComplementSpillMode - Select how the complement live range should be + /// created. SplitEditor automatically creates interval 0 to contain + /// anything that isn't added to another interval. This complement interval + /// can get quite complicated, and it can sometimes be an advantage to allow + /// it to overlap the other intervals. If it is going to spill anyway, no + /// registers are wasted by keeping a value in two places at the same time. + enum ComplementSpillMode { + /// SM_Partition(Default) - Try to create the complement interval so it + /// doesn't overlap any other intervals, and the original interval is + /// partitioned. This may require a large number of back copies and extra + /// PHI-defs. Only segments marked with overlapIntv will be overlapping. + SM_Partition, + + /// SM_Size - Overlap intervals to minimize the number of inserted COPY + /// instructions. Copies to the complement interval are hoisted to their + /// common dominator, so only one COPY is required per value in the + /// complement interval. This also means that no extra PHI-defs need to be + /// inserted in the complement interval. + SM_Size, + + /// SM_Speed - Overlap intervals to minimize the expected execution + /// frequency of the inserted copies. This is very similar to SM_Size, but + /// the complement interval may get some extra PHI-defs. + SM_Speed + }; + +private: + + /// Edit - The current parent register and new intervals created. + LiveRangeEdit *Edit; + + /// Index into Edit of the currently open interval. + /// The index 0 is used for the complement, so the first interval started by + /// openIntv will be 1. + unsigned OpenIdx; + + /// The current spill mode, selected by reset(). + ComplementSpillMode SpillMode; + + typedef IntervalMap<SlotIndex, unsigned> RegAssignMap; + + /// Allocator for the interval map. This will eventually be shared with + /// SlotIndexes and LiveIntervals. + RegAssignMap::Allocator Allocator; + + /// RegAssign - Map of the assigned register indexes. + /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at + /// Idx. + RegAssignMap RegAssign; + + typedef PointerIntPair<VNInfo*, 1> ValueForcePair; + typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap; + + /// Values - keep track of the mapping from parent values to values in the new + /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains: + /// + /// 1. No entry - the value is not mapped to Edit.get(RegIdx). + /// 2. (Null, false) - the value is mapped to multiple values in + /// Edit.get(RegIdx). Each value is represented by a minimal live range at + /// its def. The full live range can be inferred exactly from the range + /// of RegIdx in RegAssign. + /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and + /// the live range must be recomputed using LiveRangeCalc::extend(). + /// 4. (VNI, false) The value is mapped to a single new value. + /// The new value has no live ranges anywhere. + ValueMap Values; + + /// LRCalc - Cache for computing live ranges and SSA update. Each instance + /// can only handle non-overlapping live ranges, so use a separate + /// LiveRangeCalc instance for the complement interval when in spill mode. + LiveRangeCalc LRCalc[2]; + + /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the + /// complement interval can overlap the other intervals, so it gets its own + /// LRCalc instance. When not in spill mode, all intervals can share one. + LiveRangeCalc &getLRCalc(unsigned RegIdx) { + return LRCalc[SpillMode != SM_Partition && RegIdx != 0]; + } + + /// defValue - define a value in RegIdx from ParentVNI at Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in ParentLI. The new value is added to the value + /// map. + /// Return the new LI value. + VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); + + /// forceRecompute - Force the live range of ParentVNI in RegIdx to be + /// recomputed by LiveRangeCalc::extend regardless of the number of defs. + /// This is used for values whose live range doesn't match RegAssign exactly. + /// They could have rematerialized, or back-copies may have been moved. + void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI); + + /// defFromParent - Define Reg from ParentVNI at UseIdx using either + /// rematerialization or a COPY from parent. Return the new value. + VNInfo *defFromParent(unsigned RegIdx, + VNInfo *ParentVNI, + SlotIndex UseIdx, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + + /// removeBackCopies - Remove the copy instructions that defines the values + /// in the vector in the complement interval. + void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies); + + /// getShallowDominator - Returns the least busy dominator of MBB that is + /// also dominated by DefMBB. Busy is measured by loop depth. + MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB); + + /// hoistCopiesForSize - Hoist back-copies to the complement interval in a + /// way that minimizes code size. This implements the SM_Size spill mode. + void hoistCopiesForSize(); + + /// transferValues - Transfer values to the new ranges. + /// Return true if any ranges were skipped. + bool transferValues(); + + /// extendPHIKillRanges - Extend the ranges of all values killed by original + /// parent PHIDefs. + void extendPHIKillRanges(); + + /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers. + void rewriteAssigned(bool ExtendRanges); + + /// deleteRematVictims - Delete defs that are dead after rematerializing. + void deleteRematVictims(); + +public: + /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. + /// Newly created intervals will be appended to newIntervals. + SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, + MachineDominatorTree&); + + /// reset - Prepare for a new split. + void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); + + /// Create a new virtual register and live interval. + /// Return the interval index, starting from 1. Interval index 0 is the + /// implicit complement interval. + unsigned openIntv(); + + /// currentIntv - Return the current interval index. + unsigned currentIntv() const { return OpenIdx; } + + /// selectIntv - Select a previously opened interval index. + void selectIntv(unsigned Idx); + + /// enterIntvBefore - Enter the open interval before the instruction at Idx. + /// If the parent interval is not live before Idx, a COPY is not inserted. + /// Return the beginning of the new live range. + SlotIndex enterIntvBefore(SlotIndex Idx); + + /// enterIntvAfter - Enter the open interval after the instruction at Idx. + /// Return the beginning of the new live range. + SlotIndex enterIntvAfter(SlotIndex Idx); + + /// enterIntvAtEnd - Enter the open interval at the end of MBB. + /// Use the open interval from he inserted copy to the MBB end. + /// Return the beginning of the new live range. + SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB); + + /// useIntv - indicate that all instructions in MBB should use OpenLI. + void useIntv(const MachineBasicBlock &MBB); + + /// useIntv - indicate that all instructions in range should use OpenLI. + void useIntv(SlotIndex Start, SlotIndex End); + + /// leaveIntvAfter - Leave the open interval after the instruction at Idx. + /// Return the end of the live range. + SlotIndex leaveIntvAfter(SlotIndex Idx); + + /// leaveIntvBefore - Leave the open interval before the instruction at Idx. + /// Return the end of the live range. + SlotIndex leaveIntvBefore(SlotIndex Idx); + + /// leaveIntvAtTop - Leave the interval at the top of MBB. + /// Add liveness from the MBB top to the copy. + /// Return the end of the live range. + SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB); + + /// overlapIntv - Indicate that all instructions in range should use the open + /// interval, but also let the complement interval be live. + /// + /// This doubles the register pressure, but is sometimes required to deal with + /// register uses after the last valid split point. + /// + /// The Start index should be a return value from a leaveIntv* call, and End + /// should be in the same basic block. The parent interval must have the same + /// value across the range. + /// + void overlapIntv(SlotIndex Start, SlotIndex End); + + /// finish - after all the new live ranges have been created, compute the + /// remaining live range, and rewrite instructions to use the new registers. + /// @param LRMap When not null, this vector will map each live range in Edit + /// back to the indices returned by openIntv. + /// There may be extra indices created by dead code elimination. + void finish(SmallVectorImpl<unsigned> *LRMap = 0); + + /// dump - print the current interval maping to dbgs(). + void dump() const; + + // ===--- High level methods ---=== + + /// splitSingleBlock - Split CurLI into a separate live interval around the + /// uses in a single block. This is intended to be used as part of a larger + /// split, and doesn't call finish(). + void splitSingleBlock(const SplitAnalysis::BlockInfo &BI); + + /// splitLiveThroughBlock - Split CurLI in the given block such that it + /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in + /// the block, but they will be ignored when placing split points. + /// + /// @param MBBNum Block number. + /// @param IntvIn Interval index entering the block. + /// @param LeaveBefore When set, leave IntvIn before this point. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter); + + /// splitRegInBlock - Split CurLI in the given block such that it enters the + /// block in IntvIn and leaves it on the stack (or not at all). Split points + /// are placed in a way that avoids putting uses in the stack interval. This + /// may require creating a local interval when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvIn Interval index entering the block. Not 0. + /// @param LeaveBefore When set, leave IntvIn before this point. + void splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore); + + /// splitRegOutBlock - Split CurLI in the given block such that it enters the + /// block on the stack (or isn't live-in at all) and leaves it in IntvOut. + /// Split points are placed to avoid interference and such that the uses are + /// not in the stack interval. This may require creating a local interval + /// when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter); +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp new file mode 100644 index 0000000..a789a25 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -0,0 +1,802 @@ +//===-- StackColoring.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements the stack-coloring optimization that looks for +// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), +// which represent the possible lifetime of stack slots. It attempts to +// merge disjoint stack slots and reduce the used stack space. +// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. +// +// TODO: In the future we plan to improve stack coloring in the following ways: +// 1. Allow merging multiple small slots into a single larger slot at different +// offsets. +// 2. Merge this pass with StackSlotColoring and allow merging of allocas with +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +static cl::opt<bool> +DisableColoring("no-stack-coloring", + cl::init(false), cl::Hidden, + cl::desc("Disable stack coloring")); + +/// The user may write code that uses allocas outside of the declared lifetime +/// zone. This can happen when the user returns a reference to a local +/// data-structure. We can detect these cases and decide not to optimize the +/// code. If this flag is enabled, we try to save the user. +static cl::opt<bool> +ProtectFromEscapedAllocas("protect-from-escaped-allocas", + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that " + "are broken")); + +STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); +STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); +STATISTIC(StackSlotMerged, "Number of stack slot merged."); +STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); + +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// + +namespace { +/// StackColoring - A machine pass for merging disjoint stack allocations, +/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. +class StackColoring : public MachineFunctionPass { + MachineFrameInfo *MFI; + MachineFunction *MF; + + /// A class representing liveness information for a single basic block. + /// Each bit in the BitVector represents the liveness property + /// for a different stack slot. + struct BlockLifetimeInfo { + /// Which slots BEGINs in each basic block. + BitVector Begin; + /// Which slots ENDs in each basic block. + BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. + BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. + BitVector LiveOut; + }; + + /// Maps active slots (per bit) for each basic block. + typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap; + LivenessMap BlockLiveness; + + /// Maps serial numbers to basic blocks. + DenseMap<const MachineBasicBlock*, int> BasicBlocks; + /// Maps basic blocks to a serial number. + SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; + + /// Maps liveness intervals for each slot. + SmallVector<LiveInterval*, 16> Intervals; + /// VNInfo is used for the construction of LiveIntervals. + VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. + SlotIndexes *Indexes; + + /// The list of lifetime markers found. These markers are to be removed + /// once the coloring is done. + SmallVector<MachineInstr*, 8> Markers; + + /// SlotSizeSorter - A Sort utility for arranging stack slots according + /// to their size. + struct SlotSizeSorter { + MachineFrameInfo *MFI; + SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } + bool operator()(int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + } +}; + +public: + static char ID; + StackColoring() : MachineFunctionPass(ID) { + initializeStackColoringPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + /// Debug. + void dump() const; + + /// Removes all of the lifetime marker instructions from the function. + /// \returns true if any markers were removed. + bool removeAllMarkers(); + + /// Scan the machine function and find all of the lifetime markers. + /// Record the findings in the BEGIN and END vectors. + /// \returns the number of markers found. + unsigned collectMarkers(unsigned NumSlot); + + /// Perform the dataflow calculation and calculate the lifetime for each of + /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and + /// LifetimeLIVE_OUT maps that represent which stack slots are live coming + /// in and out blocks. + void calculateLocalLiveness(); + + /// Construct the LiveIntervals for the slots. + void calculateLiveIntervals(unsigned NumSlots); + + /// Go over the machine function and change instructions which use stack + /// slots to use the joint slots. + void remapInstructions(DenseMap<int, int> &SlotRemap); + + /// The input program may contain intructions which are not inside lifetime + /// markers. This can happen due to a bug in the compiler or due to a bug in + /// user code (for example, returning a reference to a local variable). + /// This procedure checks all of the instructions in the function and + /// invalidates lifetime ranges which do not contain all of the instructions + /// which access that frame slot. + void removeInvalidSlotRanges(); + + /// Map entries which point to other entries to their destination. + /// A->B->C becomes A->C. + void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); +}; +} // end anonymous namespace + +char StackColoring::ID = 0; +char &llvm::StackColoringID = StackColoring::ID; + +INITIALIZE_PASS_BEGIN(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) + +void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void StackColoring::dump() const { + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<< + " ["<<FI->getName()<<"]\n"); + + LivenessMap::const_iterator BI = BlockLiveness.find(*FI); + assert(BI != BlockLiveness.end() && "Block not found"); + const BlockLifetimeInfo &BlockInfo = BI->second; + + DEBUG(dbgs()<<"BEGIN : {"); + for (unsigned i=0; i < BlockInfo.Begin.size(); ++i) + DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"END : {"); + for (unsigned i=0; i < BlockInfo.End.size(); ++i) + DEBUG(dbgs()<<BlockInfo.End.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"LIVE_IN: {"); + for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i) + DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + DEBUG(dbgs()<<"LIVEOUT: {"); + for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i) + DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + } +} + +unsigned StackColoring::collectMarkers(unsigned NumSlot) { + unsigned MarkersFound = 0; + // Scan the function to find all lifetime markers. + // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a + // deterministic numbering, and because we'll need a post-order iteration + // later for solving the liveness dataflow problem. + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + + // Assign a serial number to this basic block. + BasicBlocks[*FI] = BasicBlockNumbering.size(); + BasicBlockNumbering.push_back(*FI); + + // Keep a reference to avoid repeated lookups. + BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI]; + + BlockInfo.Begin.resize(NumSlot); + BlockInfo.End.resize(NumSlot); + + for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); + BI != BE; ++BI) { + + if (BI->getOpcode() != TargetOpcode::LIFETIME_START && + BI->getOpcode() != TargetOpcode::LIFETIME_END) + continue; + + Markers.push_back(BI); + + bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; + const MachineOperand &MI = BI->getOperand(0); + unsigned Slot = MI.getIndex(); + + MarkersFound++; + + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< + " with allocation: "<< Allocation->getName()<<"\n"); + } + + if (IsStart) { + BlockInfo.Begin.set(Slot); + } else { + if (BlockInfo.Begin.test(Slot)) { + // Allocas that start and end within a single block are handled + // specially when computing the LiveIntervals to avoid pessimizing + // the liveness propagation. + BlockInfo.Begin.reset(Slot); + } else { + BlockInfo.End.set(Slot); + } + } + } + } + + // Update statistics. + NumMarkerSeen += MarkersFound; + return MarkersFound; +} + +void StackColoring::calculateLocalLiveness() { + // Perform a standard reverse dataflow computation to solve for + // global liveness. The BEGIN set here is equivalent to KILL in the standard + // formulation, and END is equivalent to GEN. The result of this computation + // is a map from blocks to bitvectors where the bitvectors represent which + // allocas are live in/out of that block. + SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); + unsigned NumSSMIters = 0; + bool changed = true; + while (changed) { + changed = false; + ++NumSSMIters; + + SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; + + for (SmallVector<const MachineBasicBlock*, 8>::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { + + const MachineBasicBlock *BB = *PI; + if (!BBSet.count(BB)) continue; + + // Use an iterator to avoid repeated lookups. + LivenessMap::iterator BI = BlockLiveness.find(BB); + assert(BI != BlockLiveness.end() && "Block not found"); + BlockLifetimeInfo &BlockInfo = BI->second; + + BitVector LocalLiveIn; + BitVector LocalLiveOut; + + // Forward propagation from begins to ends. + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) { + LivenessMap::const_iterator I = BlockLiveness.find(*PI); + assert(I != BlockLiveness.end() && "Predecessor not found"); + LocalLiveIn |= I->second.LiveOut; + } + LocalLiveIn |= BlockInfo.End; + LocalLiveIn.reset(BlockInfo.Begin); + + // Reverse propagation from ends to begins. + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + LivenessMap::const_iterator I = BlockLiveness.find(*SI); + assert(I != BlockLiveness.end() && "Successor not found"); + LocalLiveOut |= I->second.LiveIn; + } + LocalLiveOut |= BlockInfo.Begin; + LocalLiveOut.reset(BlockInfo.End); + + LocalLiveIn |= LocalLiveOut; + LocalLiveOut |= LocalLiveIn; + + // After adopting the live bits, we need to turn-off the bits which + // are de-activated in this block. + LocalLiveOut.reset(BlockInfo.End); + LocalLiveIn.reset(BlockInfo.Begin); + + // If we have both BEGIN and END markers in the same basic block then + // we know that the BEGIN marker comes after the END, because we already + // handle the case where the BEGIN comes before the END when collecting + // the markers (and building the BEGIN/END vectore). + // Want to enable the LIVE_IN and LIVE_OUT of slots that have both + // BEGIN and END because it means that the value lives before and after + // this basic block. + BitVector LocalEndBegin = BlockInfo.End; + LocalEndBegin &= BlockInfo.Begin; + LocalLiveIn |= LocalEndBegin; + LocalLiveOut |= LocalEndBegin; + + if (LocalLiveIn.test(BlockInfo.LiveIn)) { + changed = true; + BlockInfo.LiveIn |= LocalLiveIn; + + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + NextBBSet.insert(*PI); + } + + if (LocalLiveOut.test(BlockInfo.LiveOut)) { + changed = true; + BlockInfo.LiveOut |= LocalLiveOut; + + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + NextBBSet.insert(*SI); + } + } + + BBSet = NextBBSet; + }// while changed. +} + +void StackColoring::calculateLiveIntervals(unsigned NumSlots) { + SmallVector<SlotIndex, 16> Starts; + SmallVector<SlotIndex, 16> Finishes; + + // For each block, find which slots are active within this block + // and update the live intervals. + for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); + MBB != MBBe; ++MBB) { + Starts.clear(); + Starts.resize(NumSlots); + Finishes.clear(); + Finishes.resize(NumSlots); + + // Create the interval for the basic blocks with lifetime markers in them. + for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(), + e = Markers.end(); it != e; ++it) { + const MachineInstr *MI = *it; + if (MI->getParent() != MBB) + continue; + + assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || + MI->getOpcode() == TargetOpcode::LIFETIME_END) && + "Invalid Lifetime marker"); + + bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; + const MachineOperand &Mo = MI->getOperand(0); + int Slot = Mo.getIndex(); + assert(Slot >= 0 && "Invalid slot"); + + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } + } + + // Create the interval of the blocks that we previously found to be 'alive'. + BitVector Alive = BlockLiveness[MBB].LiveIn; + Alive |= BlockLiveness[MBB].LiveOut; + + if (Alive.any()) { + for (int pos = Alive.find_first(); pos != -1; + pos = Alive.find_next(pos)) { + if (!Starts[pos].isValid()) + Starts[pos] = Indexes->getMBBStartIdx(MBB); + if (!Finishes[pos].isValid()) + Finishes[pos] = Indexes->getMBBEndIdx(MBB); + } + } + + for (unsigned i = 0; i < NumSlots; ++i) { + assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + if (!Starts[i].isValid()) + continue; + + assert(Starts[i] && Finishes[i] && "Invalid interval"); + VNInfo *ValNum = Intervals[i]->getValNumInfo(0); + SlotIndex S = Starts[i]; + SlotIndex F = Finishes[i]; + if (S < F) { + // We have a single consecutive region. + Intervals[i]->addRange(LiveRange(S, F, ValNum)); + } else { + // We have two non consecutive regions. This happens when + // LIFETIME_START appears after the LIFETIME_END marker. + SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); + Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); + Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + } + } + } +} + +bool StackColoring::removeAllMarkers() { + unsigned Count = 0; + for (unsigned i = 0; i < Markers.size(); ++i) { + Markers[i]->eraseFromParent(); + Count++; + } + Markers.clear(); + + DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); + return Count; +} + +void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { + unsigned FixedInstr = 0; + unsigned FixedMemOp = 0; + unsigned FixedDbg = 0; + MachineModuleInfo *MMI = &MF->getMMI(); + + // Remap debug information that refers to stack slots. + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + const MDNode *Var = VI->first; + if (!Var) continue; + std::pair<unsigned, DebugLoc> &VP = VI->second; + if (SlotRemap.count(VP.first)) { + DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n"); + VP.first = SlotRemap[VP.first]; + FixedDbg++; + } + } + + // Keep a list of *allocas* which need to be remapped. + DenseMap<const AllocaInst*, const AllocaInst*> Allocas; + for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(), + e = SlotRemap.end(); it != e; ++it) { + const AllocaInst *From = MFI->getObjectAllocation(it->first); + const AllocaInst *To = MFI->getObjectAllocation(it->second); + assert(To && From && "Invalid allocation object"); + Allocas[From] = To; + } + + // Remap all instructions to the new stack slots. + MachineFunction::iterator BB, BBE; + MachineBasicBlock::iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + // Skip lifetime markers. We'll remove them soon. + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END) + continue; + + // Update the MachineMemOperand to use the new alloca. + for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), + E = I->memoperands_end(); MM != E; ++MM) { + MachineMemOperand *MMO = *MM; + + const Value *V = MMO->getValue(); + + if (!V) + continue; + + // Climb up and find the original alloca. + V = GetUnderlyingObject(V); + // If we did not find one, or if the one that we found is not in our + // map, then move on. + if (!V || !isa<AllocaInst>(V)) { + // Clear mem operand since we don't know for sure that it doesn't + // alias a merged alloca. + MMO->setValue(0); + continue; + } + const AllocaInst *AI= cast<AllocaInst>(V); + if (!Allocas.count(AI)) + continue; + + MMO->setValue(Allocas[AI]); + FixedMemOp++; + } + + // Update all of the machine instruction operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + int FromSlot = MO.getIndex(); + + // Don't touch arguments. + if (FromSlot<0) + continue; + + // Only look at mapped slots. + if (!SlotRemap.count(FromSlot)) + continue; + + // In a debug build, check that the instruction that we are modifying is + // inside the expected live range. If the instruction is not inside + // the calculated range then it means that the alloca usage moved + // outside of the lifetime markers, or that the user has a bug. + // NOTE: Alloca address calculations which happen outside the lifetime + // zone are are okay, despite the fact that we don't have a good way + // for validating all of the usages of the calculation. +#ifndef NDEBUG + bool TouchesMemory = I->mayLoad() || I->mayStore(); + // If we *don't* protect the user from escaped allocas, don't bother + // validating the instructions. + if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { + SlotIndex Index = Indexes->getInstructionIndex(I); + LiveInterval *Interval = Intervals[FromSlot]; + assert(Interval->find(Index) != Interval->end() && + "Found instruction usage outside of live range."); + } +#endif + + // Fix the machine instructions. + int ToSlot = SlotRemap[FromSlot]; + MO.setIndex(ToSlot); + FixedInstr++; + } + } + + DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); +} + +void StackColoring::removeInvalidSlotRanges() { + MachineFunction::const_iterator BB, BBE; + MachineBasicBlock::const_iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + if (I->getOpcode() == TargetOpcode::LIFETIME_START || + I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue()) + continue; + + // Some intervals are suspicious! In some cases we find address + // calculations outside of the lifetime zone, but not actual memory + // read or write. Memory accesses outside of the lifetime zone are a clear + // violation, but address calculations are okay. This can happen when + // GEPs are hoisted outside of the lifetime zone. + // So, in here we only check instructions which can read or write memory. + if (!I->mayLoad() && !I->mayStore()) + continue; + + // Check all of the machine operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + const MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + + int Slot = MO.getIndex(); + + if (Slot<0) + continue; + + if (Intervals[Slot]->empty()) + continue; + + // Check that the used slot is inside the calculated lifetime range. + // If it is not, warn about it and invalidate the range. + LiveInterval *Interval = Intervals[Slot]; + SlotIndex Index = Indexes->getInstructionIndex(I); + if (Interval->find(Index) == Interval->end()) { + Intervals[Slot]->clear(); + DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); + EscapedAllocas++; + } + } + } +} + +void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, + unsigned NumSlots) { + // Expunge slot remap map. + for (unsigned i=0; i < NumSlots; ++i) { + // If we are remapping i + if (SlotRemap.count(i)) { + int Target = SlotRemap[i]; + // As long as our target is mapped to something else, follow it. + while (SlotRemap.count(Target)) { + Target = SlotRemap[Target]; + SlotRemap[i] = Target; + } + } + } +} + +bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + DEBUG(dbgs() << "********** Stack Coloring **********\n" + << "********** Function: " + << ((const Value*)Func.getFunction())->getName() << '\n'); + MF = &Func; + MFI = MF->getFrameInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + BlockLiveness.clear(); + BasicBlocks.clear(); + BasicBlockNumbering.clear(); + Markers.clear(); + Intervals.clear(); + VNInfoAllocator.Reset(); + + unsigned NumSlots = MFI->getObjectIndexEnd(); + + // If there are no stack slots then there are no markers to remove. + if (!NumSlots) + return false; + + SmallVector<int, 8> SortedSlots; + + SortedSlots.reserve(NumSlots); + Intervals.reserve(NumSlots); + + unsigned NumMarkers = collectMarkers(NumSlots); + + unsigned TotalSize = 0; + DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); + DEBUG(dbgs()<<"Slot structure:\n"); + + for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { + DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); + TotalSize += MFI->getObjectSize(i); + } + + DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); + + // Don't continue because there are not enough lifetime markers, or the + // stack is too small, or we are told not to optimize the slots. + if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + DEBUG(dbgs()<<"Will not try to merge slots.\n"); + return removeAllMarkers(); + } + + for (unsigned i=0; i < NumSlots; ++i) { + LiveInterval *LI = new LiveInterval(i, 0); + Intervals.push_back(LI); + LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + SortedSlots.push_back(i); + } + + // Calculate the liveness of each block. + calculateLocalLiveness(); + + // Propagate the liveness information. + calculateLiveIntervals(NumSlots); + + // Search for allocas which are used outside of the declared lifetime + // markers. + if (ProtectFromEscapedAllocas) + removeInvalidSlotRanges(); + + // Maps old slots to new slots. + DenseMap<int, int> SlotRemap; + unsigned RemovedSlots = 0; + unsigned ReducedSize = 0; + + // Do not bother looking at empty intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + if (Intervals[SortedSlots[I]]->empty()) + SortedSlots[I] = -1; + } + + // This is a simple greedy algorithm for merging allocas. First, sort the + // slots, placing the largest slots first. Next, perform an n^2 scan and look + // for disjoint slots. When you find disjoint slots, merge the samller one + // into the bigger one and update the live interval. Remove the small alloca + // and continue. + + // Sort the slots according to their size. Place unused slots at the end. + // Use stable sort to guarantee deterministic code generation. + std::stable_sort(SortedSlots.begin(), SortedSlots.end(), + SlotSizeSorter(MFI)); + + bool Changed = true; + while (Changed) { + Changed = false; + for (unsigned I = 0; I < NumSlots; ++I) { + if (SortedSlots[I] == -1) + continue; + + for (unsigned J=I+1; J < NumSlots; ++J) { + if (SortedSlots[J] == -1) + continue; + + int FirstSlot = SortedSlots[I]; + int SecondSlot = SortedSlots[J]; + LiveInterval *First = Intervals[FirstSlot]; + LiveInterval *Second = Intervals[SecondSlot]; + assert (!First->empty() && !Second->empty() && "Found an empty range"); + + // Merge disjoint slots. + if (!First->overlaps(*Second)) { + Changed = true; + First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< + SecondSlot<<" together.\n"); + unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), + MFI->getObjectAlignment(SecondSlot)); + + assert(MFI->getObjectSize(FirstSlot) >= + MFI->getObjectSize(SecondSlot) && + "Merging a small object into a larger one"); + + RemovedSlots+=1; + ReducedSize += MFI->getObjectSize(SecondSlot); + MFI->setObjectAlignment(FirstSlot, MaxAlignment); + MFI->RemoveStackObject(SecondSlot); + } + } + } + }// While changed. + + // Record statistics. + StackSpaceSaved += ReducedSize; + StackSlotMerged += RemovedSlots; + DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< + ReducedSize<<" bytes\n"); + + // Scan the entire function and update all machine operands that use frame + // indices to use the remapped frame index. + expungeSlotMap(SlotRemap, NumSlots); + remapInstructions(SlotRemap); + + // Release the intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + delete Intervals[I]; + } + + return removeAllMarkers(); +} diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp new file mode 100644 index 0000000..fbef347 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -0,0 +1,370 @@ +//===-- StackProtector.cpp - Stack Protector Insertion --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass inserts stack protectors into functions which need them. A variable +// with a random value in it is stored onto the stack before the local variables +// are allocated. Upon exiting the block, the stored value is checked. If it's +// changed, then there was some sort of violation and the program aborts. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +STATISTIC(NumFunProtected, "Number of functions protected"); +STATISTIC(NumAddrTaken, "Number of local variables that have their address" + " taken."); + +namespace { + class StackProtector : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLoweringBase *TLI; + + Function *F; + Module *M; + + DominatorTree *DT; + + /// VisitedPHIs - The set of PHI nodes visited when determining + /// if a variable's reference has been taken. This set + /// is maintained to ensure we don't visit the same PHI node multiple + /// times. + SmallPtrSet<const PHINode*, 16> VisitedPHIs; + + /// InsertStackProtectors - Insert code into the prologue and epilogue of + /// the function. + /// + /// - The prologue code loads and stores the stack guard onto the stack. + /// - The epilogue checks the value stored in the prologue against the + /// original value. It calls __stack_chk_fail if they differ. + bool InsertStackProtectors(); + + /// CreateFailBB - Create a basic block to jump to when the stack protector + /// check fails. + BasicBlock *CreateFailBB(); + + /// ContainsProtectableArray - Check whether the type either is an array or + /// contains an array of sufficient size so that we need stack protectors + /// for it. + bool ContainsProtectableArray(Type *Ty, bool Strong = false, + bool InStruct = false) const; + + /// \brief Check whether a stack allocation has its address taken. + bool HasAddressTaken(const Instruction *AI); + + /// RequiresStackProtector - Check whether or not this function needs a + /// stack protector based upon the stack protector level. + bool RequiresStackProtector(); + public: + static char ID; // Pass identification, replacement for typeid. + StackProtector() : FunctionPass(ID), TLI(0) { + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } + StackProtector(const TargetLoweringBase *tli) + : FunctionPass(ID), TLI(tli) { + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + } + + virtual bool runOnFunction(Function &Fn); + }; +} // end anonymous namespace + +char StackProtector::ID = 0; +INITIALIZE_PASS(StackProtector, "stack-protector", + "Insert stack protectors", false, false) + +FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { + return new StackProtector(tli); +} + +bool StackProtector::runOnFunction(Function &Fn) { + F = &Fn; + M = F->getParent(); + DT = getAnalysisIfAvailable<DominatorTree>(); + + if (!RequiresStackProtector()) return false; + + ++NumFunProtected; + return InsertStackProtectors(); +} + +/// ContainsProtectableArray - Check whether the type either is an array or +/// contains a char array of sufficient size so that we need stack protectors +/// for it. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, + bool InStruct) const { + if (!Ty) return false; + if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + // In strong mode any array, regardless of type and size, triggers a + // protector + if (Strong) + return true; + const TargetMachine &TM = TLI->getTargetMachine(); + if (!AT->getElementType()->isIntegerTy(8)) { + Triple Trip(TM.getTargetTriple()); + + // If we're on a non-Darwin platform or we're inside of a structure, don't + // add stack protectors unless the array is a character array. + if (InStruct || !Trip.isOSDarwin()) + return false; + } + + // If an array has more than SSPBufferSize bytes of allocated space, then we + // emit stack protectors. + if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + return true; + } + + const StructType *ST = dyn_cast<StructType>(Ty); + if (!ST) return false; + + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); I != E; ++I) + if (ContainsProtectableArray(*I, Strong, true)) + return true; + + return false; +} + +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (AI == SI->getValueOperand()) + return true; + } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) { + if (AI == SI->getOperand(0)) + return true; + } else if (isa<CallInst>(U)) { + return true; + } else if (isa<InvokeInst>(U)) { + return true; + } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) { + if (HasAddressTaken(SI)) + return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + if (VisitedPHIs.insert(PN)) + if (HasAddressTaken(PN)) + return true; + } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + if (HasAddressTaken(GEP)) + return true; + } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) { + if (HasAddressTaken(BI)) + return true; + } + } + return false; +} + +/// \brief Check whether or not this function needs a stack protector based +/// upon the stack protector level. +/// +/// We use two heuristics: a standard (ssp) and strong (sspstrong). +/// The standard heuristic which will add a guard variable to functions that +/// call alloca with a either a variable size or a size >= SSPBufferSize, +/// functions with character buffers larger than SSPBufferSize, and functions +/// with aggregates containing character buffers larger than SSPBufferSize. The +/// strong heuristic will add a guard variables to functions that call alloca +/// regardless of size, functions with any buffer regardless of type and size, +/// functions with aggregates that contain any buffer regardless of type and +/// size, and functions that contain stack-based variables that have had their +/// address taken. +bool StackProtector::RequiresStackProtector() { + bool Strong = false; + if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) + return true; + else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + Strong = true; + else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtect)) + return false; + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + BasicBlock *BB = I; + + for (BasicBlock::iterator + II = BB->begin(), IE = BB->end(); II != IE; ++II) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (AI->isArrayAllocation()) { + // SSP-Strong: Enable protectors for any call to alloca, regardless + // of size. + if (Strong) + return true; + + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(AI->getArraySize())) { + unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; + if (CI->getLimitedValue(BufferSize) >= BufferSize) + // A call to alloca with size >= SSPBufferSize requires + // stack protectors. + return true; + } else // A call to alloca with a variable size requires protectors. + return true; + } + + if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) + return true; + + if (Strong && HasAddressTaken(AI)) { + ++NumAddrTaken; + return true; + } + } + } + } + + return false; +} + +/// InsertStackProtectors - Insert code into the prologue and epilogue of the +/// function. +/// +/// - The prologue code loads and stores the stack guard onto the stack. +/// - The epilogue checks the value stored in the prologue against the original +/// value. It calls __stack_chk_fail if they differ. +bool StackProtector::InsertStackProtectors() { + BasicBlock *FailBB = 0; // The basic block to jump to if check fails. + BasicBlock *FailBBDom = 0; // FailBB's dominator. + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Value *StackGuardVar = 0; // The stack guard variable. + + for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + BasicBlock *BB = I++; + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) continue; + + if (!FailBB) { + // Insert code into the entry block that stores the __stack_chk_guard + // variable onto the stack: + // + // entry: + // StackGuardSlot = alloca i8* + // StackGuard = load __stack_chk_guard + // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) + // + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, + PointerType::get(PtrTy, AddressSpace)); + } else { + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } + + BasicBlock &Entry = F->getEntryBlock(); + Instruction *InsPt = &Entry.front(); + + AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); + LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + + Value *Args[] = { LI, AI }; + CallInst:: + Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + Args, "", InsPt); + + // Create the basic block to jump to when the guard check fails. + FailBB = CreateFailBB(); + } + + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; + } + + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); + + // Move the newly created basic block to the point right after the old basic + // block so that it's in the "fall through" position. + NewBB->moveAfter(BB); + + // Generate the stack protector instructions in the old basic block. + LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); + LoadInst *LI2 = new LoadInst(AI, "", true, BB); + ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); + BranchInst::Create(NewBB, FailBB, Cmp, BB); + } + + // Return if we didn't modify any basic blocks. I.e., there are no return + // statements in the function. + if (!FailBB) return false; + + if (DT && FailBBDom) + DT->addNewBlock(FailBB, FailBBDom); + + return true; +} + +/// CreateFailBB - Create a basic block to jump to when the stack protector +/// check fails. +BasicBlock *StackProtector::CreateFailBB() { + BasicBlock *FailBB = BasicBlock::Create(F->getContext(), + "CallStackCheckFailBlk", F); + Constant *StackChkFail = + M->getOrInsertFunction("__stack_chk_fail", + Type::getVoidTy(F->getContext()), NULL); + CallInst::Create(StackChkFail, "", FailBB); + new UnreachableInst(F->getContext(), FailBB); + return FailBB; +} diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp new file mode 100644 index 0000000..f951561 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -0,0 +1,439 @@ +//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the stack slot coloring pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackslotcoloring" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <vector> +using namespace llvm; + +static cl::opt<bool> +DisableSharing("no-stack-slot-sharing", + cl::init(false), cl::Hidden, + cl::desc("Suppress slot sharing during stack coloring")); + +static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); + +STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); +STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); + +namespace { + class StackSlotColoring : public MachineFunctionPass { + LiveStacks* LS; + MachineFrameInfo *MFI; + const TargetInstrInfo *TII; + const MachineLoopInfo *loopInfo; + + // SSIntervals - Spill slot intervals. + std::vector<LiveInterval*> SSIntervals; + + // SSRefs - Keep a list of frame index references for each spill slot. + SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs; + + // OrigAlignments - Alignments of stack objects before coloring. + SmallVector<unsigned, 16> OrigAlignments; + + // OrigSizes - Sizess of stack objects before coloring. + SmallVector<unsigned, 16> OrigSizes; + + // AllColors - If index is set, it's a spill slot, i.e. color. + // FIXME: This assumes PEI locate spill slot with smaller indices + // closest to stack pointer / frame pointer. Therefore, smaller + // index == better color. + BitVector AllColors; + + // NextColor - Next "color" that's not yet used. + int NextColor; + + // UsedColors - "Colors" that have been assigned. + BitVector UsedColors; + + // Assignments - Color to intervals mapping. + SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments; + + public: + static char ID; // Pass identification + StackSlotColoring() : + MachineFunctionPass(ID), NextColor(-1) { + initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveStacks>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + void InitializeSlots(); + void ScanForSpillSlotRefs(MachineFunction &MF); + bool OverlapWithAssignments(LiveInterval *li, int Color) const; + int ColorSlot(LiveInterval *li); + bool ColorSlots(MachineFunction &MF); + void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + MachineFunction &MF); + bool RemoveDeadStores(MachineBasicBlock* MBB); + }; +} // end anonymous namespace + +char StackSlotColoring::ID = 0; +char &llvm::StackSlotColoringID = StackSlotColoring::ID; + +INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false) + +namespace { + // IntervalSorter - Comparison predicate that sort live intervals by + // their weight. + struct IntervalSorter { + bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { + return LHS->weight > RHS->weight; + } + }; +} + +/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot +/// references and update spill slot weights. +void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { + SSRefs.resize(MFI->getObjectIndexEnd()); + + // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + unsigned loopDepth = loopInfo->getLoopDepth(MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI < 0) + continue; + if (!LS->hasInterval(FI)) + continue; + LiveInterval &li = LS->getInterval(FI); + if (!MI->isDebugValue()) + li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); + SSRefs[FI].push_back(MI); + } + } + } +} + +/// InitializeSlots - Process all spill stack slot liveintervals and add them +/// to a sorted (by weight) list. +void StackSlotColoring::InitializeSlots() { + int LastFI = MFI->getObjectIndexEnd(); + OrigAlignments.resize(LastFI); + OrigSizes.resize(LastFI); + AllColors.resize(LastFI); + UsedColors.resize(LastFI); + Assignments.resize(LastFI); + + // Gather all spill slots into a list. + DEBUG(dbgs() << "Spill slot intervals:\n"); + for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { + LiveInterval &li = i->second; + DEBUG(li.dump()); + int FI = TargetRegisterInfo::stackSlot2Index(li.reg); + if (MFI->isDeadObjectIndex(FI)) + continue; + SSIntervals.push_back(&li); + OrigAlignments[FI] = MFI->getObjectAlignment(FI); + OrigSizes[FI] = MFI->getObjectSize(FI); + AllColors.set(FI); + } + DEBUG(dbgs() << '\n'); + + // Sort them by weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + + // Get first "color". + NextColor = AllColors.find_first(); +} + +/// OverlapWithAssignments - Return true if LiveInterval overlaps with any +/// LiveIntervals that have already been assigned to the specified color. +bool +StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { + const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color]; + for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { + LiveInterval *OtherLI = OtherLIs[i]; + if (OtherLI->overlaps(*li)) + return true; + } + return false; +} + +/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. +/// +int StackSlotColoring::ColorSlot(LiveInterval *li) { + int Color = -1; + bool Share = false; + if (!DisableSharing) { + // Check if it's possible to reuse any of the used colors. + Color = UsedColors.find_first(); + while (Color != -1) { + if (!OverlapWithAssignments(li, Color)) { + Share = true; + ++NumEliminated; + break; + } + Color = UsedColors.find_next(Color); + } + } + + // Assign it to the first available color (assumed to be the best) if it's + // not possible to share a used color with other objects. + if (!Share) { + assert(NextColor != -1 && "No more spill slots?"); + Color = NextColor; + UsedColors.set(Color); + NextColor = AllColors.find_next(NextColor); + } + + // Record the assignment. + Assignments[Color].push_back(li); + int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); + + // Change size and alignment of the allocated slot. If there are multiple + // objects sharing the same slot, then make sure the size and alignment + // are large enough for all. + unsigned Align = OrigAlignments[FI]; + if (!Share || Align > MFI->getObjectAlignment(Color)) + MFI->setObjectAlignment(Color, Align); + int64_t Size = OrigSizes[FI]; + if (!Share || Size > MFI->getObjectSize(Color)) + MFI->setObjectSize(Color, Size); + return Color; +} + +/// Colorslots - Color all spill stack slots and rewrite all frameindex machine +/// operands in the function. +bool StackSlotColoring::ColorSlots(MachineFunction &MF) { + unsigned NumObjs = MFI->getObjectIndexEnd(); + SmallVector<int, 16> SlotMapping(NumObjs, -1); + SmallVector<float, 16> SlotWeights(NumObjs, 0.0); + SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); + BitVector UsedColors(NumObjs); + + DEBUG(dbgs() << "Color spill slot intervals:\n"); + bool Changed = false; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int NewSS = ColorSlot(li); + assert(NewSS >= 0 && "Stack coloring failed?"); + SlotMapping[SS] = NewSS; + RevMap[NewSS].push_back(SS); + SlotWeights[NewSS] += li->weight; + UsedColors.set(NewSS); + Changed |= (SS != NewSS); + } + + DEBUG(dbgs() << "\nSpill slots after coloring:\n"); + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + li->weight = SlotWeights[SS]; + } + // Sort them by new weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + +#ifndef NDEBUG + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) + DEBUG(SSIntervals[i]->dump()); + DEBUG(dbgs() << '\n'); +#endif + + if (!Changed) + return false; + + // Rewrite all MO_FrameIndex operands. + SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs()); + for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { + int NewFI = SlotMapping[SS]; + if (NewFI == -1 || (NewFI == (int)SS)) + continue; + + SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS]; + for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) + RewriteInstruction(RefMIs[i], SS, NewFI, MF); + } + + // Delete unused stack slots. + while (NextColor != -1) { + DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); + MFI->RemoveStackObject(NextColor); + NextColor = AllColors.find_next(NextColor); + } + + return true; +} + +/// RewriteInstruction - Rewrite specified instruction by replacing references +/// to old frame index with new one. +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, + int NewFI, MachineFunction &MF) { + // Update the operands. + for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI != OldFI) + continue; + MO.setIndex(NewFI); + } + + // Update the memory references. This changes the MachineMemOperands + // directly. They may be in use by multiple instructions, however all + // instructions using OldFI are being rewritten to use NewFI. + const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) + if ((*I)->getValue() == OldSV) + (*I)->setValue(NewSV); +} + + +/// RemoveDeadStores - Scan through a basic block and look for loads followed +/// by stores. If they're both using the same stack slot, then the store is +/// definitely dead. This could obviously be much more aggressive (consider +/// pairs with instructions between them), but such extensions might have a +/// considerable compile time impact. +bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { + // FIXME: This could be much more aggressive, but we need to investigate + // the compile time impact of doing so. + bool changed = false; + + SmallVector<MachineInstr*, 4> toErase; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (DCELimit != -1 && (int)NumDead >= DCELimit) + break; + + MachineBasicBlock::iterator NextMI = llvm::next(I); + if (NextMI == MBB->end()) continue; + + int FirstSS, SecondSS; + unsigned LoadReg = 0; + unsigned StoreReg = 0; + if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; + if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; + if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; + + ++NumDead; + changed = true; + + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { + ++NumDead; + toErase.push_back(I); + } + + toErase.push_back(NextMI); + ++I; + } + + for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(), + E = toErase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return changed; +} + + +bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Stack Slot Coloring **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MFI = MF.getFrameInfo(); + TII = MF.getTarget().getInstrInfo(); + LS = &getAnalysis<LiveStacks>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + bool Changed = false; + + unsigned NumSlots = LS->getNumIntervals(); + if (NumSlots == 0) + // Nothing to do! + return false; + + // If there are calls to setjmp or sigsetjmp, don't perform stack slot + // coloring. The stack could be modified before the longjmp is executed, + // resulting in the wrong value being used afterwards. (See + // <rdar://problem/8007500>.) + if (MF.exposesReturnsTwice()) + return false; + + // Gather spill slot references + ScanForSpillSlotRefs(MF); + InitializeSlots(); + Changed = ColorSlots(MF); + + NextColor = -1; + SSIntervals.clear(); + for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) + SSRefs[i].clear(); + SSRefs.clear(); + OrigAlignments.clear(); + OrigSizes.clear(); + AllColors.clear(); + UsedColors.clear(); + for (unsigned i = 0, e = Assignments.size(); i != e; ++i) + Assignments[i].clear(); + Assignments.clear(); + + if (Changed) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= RemoveDeadStores(I); + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp new file mode 100644 index 0000000..b337c53 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp @@ -0,0 +1,825 @@ +//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates PHI instructions by aggressively coalescing the copies +// that would be inserted by a naive algorithm and only inserting the copies +// that are necessary. The coalescing technique initially assumes that all +// registers appearing in a PHI instruction do not interfere. It then eliminates +// proven interferences, using dominators to only perform a linear number of +// interference tests instead of the quadratic number of interference tests +// that this would naively require. This is a technique derived from: +// +// Budimlic, et al. Fast copy coalescing and live-range identification. +// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language +// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). +// PLDI '02. ACM, New York, NY, 25-32. +// +// The original implementation constructs a data structure they call a dominance +// forest for this purpose. The dominance forest was shown to be unnecessary, +// as it is possible to emulate the creation and traversal of a dominance forest +// by directly using the dominator tree, rather than actually constructing the +// dominance forest. This technique is explained in: +// +// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code +// Quality and Efficiency, +// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code +// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). +// CGO '09. IEEE, Washington, DC, 114-125. +// +// Careful implementation allows for all of the dominator forest interference +// checks to be performed at once in a single depth-first traversal of the +// dominator tree, which is what is implemented here. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "strongphielim" +#include "llvm/CodeGen/Passes.h" +#include "PHIEliminationUtils.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +namespace { + class StrongPHIElimination : public MachineFunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + StrongPHIElimination() : MachineFunctionPass(ID) { + initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + + private: + /// This struct represents a single node in the union-find data structure + /// representing the variable congruence classes. There is one difference + /// from a normal union-find data structure. We steal two bits from the parent + /// pointer . One of these bits is used to represent whether the register + /// itself has been isolated, and the other is used to represent whether the + /// PHI with that register as its destination has been isolated. + /// + /// Note that this leads to the strange situation where the leader of a + /// congruence class may no longer logically be a member, due to being + /// isolated. + struct Node { + enum Flags { + kRegisterIsolatedFlag = 1, + kPHIIsolatedFlag = 2 + }; + Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } + + Node *getLeader(); + + PointerIntPair<Node*, 2> parent; + unsigned value; + unsigned rank; + }; + + /// Add a register in a new congruence class containing only itself. + void addReg(unsigned); + + /// Join the congruence classes of two registers. This function is biased + /// towards the left argument, i.e. after + /// + /// addReg(r2); + /// unionRegs(r1, r2); + /// + /// the leader of the unioned congruence class is the same as the leader of + /// r1's congruence class prior to the union. This is actually relied upon + /// in the copy insertion code. + void unionRegs(unsigned, unsigned); + + /// Get the color of a register. The color is 0 if the register has been + /// isolated. + unsigned getRegColor(unsigned); + + // Isolate a register. + void isolateReg(unsigned); + + /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been + /// isolated. Otherwise, it is the original color of its destination and + /// all of its operands (before they were isolated, if they were). + unsigned getPHIColor(MachineInstr*); + + /// Isolate a PHI. + void isolatePHI(MachineInstr*); + + /// Traverses a basic block, splitting any interferences found between + /// registers in the same congruence class. It takes two DenseMaps as + /// arguments that it also updates: CurrentDominatingParent, which maps + /// a color to the register in that congruence class whose definition was + /// most recently seen, and ImmediateDominatingParent, which maps a register + /// to the register in the same congruence class that most immediately + /// dominates it. + /// + /// This function assumes that it is being called in a depth-first traversal + /// of the dominator tree. + void SplitInterferencesForBasicBlock( + MachineBasicBlock&, + DenseMap<unsigned, unsigned> &CurrentDominatingParent, + DenseMap<unsigned, unsigned> &ImmediateDominatingParent); + + // Lowers a PHI instruction, inserting copies of the source and destination + // registers as necessary. + void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); + + // Merges the live interval of Reg into NewReg and renames Reg to NewReg + // everywhere that Reg appears. Requires Reg and NewReg to have non- + // overlapping lifetimes. + void MergeLIsAndRename(unsigned Reg, unsigned NewReg); + + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + MachineDominatorTree *DT; + LiveIntervals *LI; + + BumpPtrAllocator Allocator; + + DenseMap<unsigned, Node*> RegNodeMap; + + // Maps a basic block to a list of its defs of registers that appear as PHI + // sources. + DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs; + + // Maps a color to a pair of a MachineInstr* and a virtual register, which + // is the operand of that PHI corresponding to the current basic block. + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; + + // FIXME: Can these two data structures be combined? Would a std::multimap + // be any better? + + // Stores pairs of predecessor basic blocks and the source registers of + // inserted copy instructions. + typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; + SrcCopySet InsertedSrcCopySet; + + // Maps pairs of predecessor basic blocks and colors to their defining copy + // instructions. + typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> + SrcCopyMap; + SrcCopyMap InsertedSrcCopyMap; + + // Maps inserted destination copy registers to their defining copy + // instructions. + typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; + DestCopyMap InsertedDestCopies; + }; + + struct MIIndexCompare { + MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } + + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); + } + + LiveIntervals *LI; + }; +} // namespace + +STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); +STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); +STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); + +char StrongPHIElimination::ID = 0; +INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false) + +char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; + +void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { + // FIXME: This only needs to check from the first terminator, as only the + // first terminator can use a virtual register. + for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { + assert (RI != MBB->rend()); + MachineInstr *MI = &*RI; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + MachineOperand &MO = *OI; + if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) + return &MO; + } + } +} + +bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + TII = MF.getTarget().getInstrInfo(); + DT = &getAnalysis<MachineDominatorTree>(); + LI = &getAnalysis<LiveIntervals>(); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + unsigned DestReg = BBI->getOperand(0).getReg(); + addReg(DestReg); + PHISrcDefs[I].push_back(BBI); + + for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { + MachineOperand &SrcMO = BBI->getOperand(i); + unsigned SrcReg = SrcMO.getReg(); + addReg(SrcReg); + unionRegs(DestReg, SrcReg); + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI) + PHISrcDefs[DefMI->getParent()].push_back(DefMI); + } + } + } + + // Perform a depth-first traversal of the dominator tree, splitting + // interferences amongst PHI-congruence classes. + DenseMap<unsigned, unsigned> CurrentDominatingParent; + DenseMap<unsigned, unsigned> ImmediateDominatingParent; + for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) { + SplitInterferencesForBasicBlock(*DI->getBlock(), + CurrentDominatingParent, + ImmediateDominatingParent); + } + + // Insert copies for all PHI source and destination registers. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + InsertCopiesForPHI(BBI, I); + } + } + + // FIXME: Preserve the equivalence classes during copy insertion and use + // the preversed equivalence classes instead of recomputing them. + RegNodeMap.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + unsigned DestReg = BBI->getOperand(0).getReg(); + addReg(DestReg); + + for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { + unsigned SrcReg = BBI->getOperand(i).getReg(); + addReg(SrcReg); + unionRegs(DestReg, SrcReg); + } + } + } + + DenseMap<unsigned, unsigned> RegRenamingMap; + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + while (BBI != BBE && BBI->isPHI()) { + MachineInstr *PHI = BBI; + + assert(PHI->getNumOperands() > 0); + + unsigned SrcReg = PHI->getOperand(1).getReg(); + unsigned SrcColor = getRegColor(SrcReg); + unsigned NewReg = RegRenamingMap[SrcColor]; + if (!NewReg) { + NewReg = SrcReg; + RegRenamingMap[SrcColor] = SrcReg; + } + MergeLIsAndRename(SrcReg, NewReg); + + unsigned DestReg = PHI->getOperand(0).getReg(); + if (!InsertedDestCopies.count(DestReg)) + MergeLIsAndRename(DestReg, NewReg); + + for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { + unsigned SrcReg = PHI->getOperand(i).getReg(); + MergeLIsAndRename(SrcReg, NewReg); + } + + ++BBI; + LI->RemoveMachineInstrFromMaps(PHI); + PHI->eraseFromParent(); + Changed = true; + } + } + + // Due to the insertion of copies to split live ranges, the live intervals are + // guaranteed to not overlap, except in one case: an original PHI source and a + // PHI destination copy. In this case, they have the same value and thus don't + // truly intersect, so we merge them into the value live at that point. + // FIXME: Is there some better way we can handle this? + for (DestCopyMap::iterator I = InsertedDestCopies.begin(), + E = InsertedDestCopies.end(); I != E; ++I) { + unsigned DestReg = I->first; + unsigned DestColor = getRegColor(DestReg); + unsigned NewReg = RegRenamingMap[DestColor]; + + LiveInterval &DestLI = LI->getInterval(DestReg); + LiveInterval &NewLI = LI->getInterval(NewReg); + + assert(DestLI.ranges.size() == 1 + && "PHI destination copy's live interval should be a single live " + "range from the beginning of the BB to the copy instruction."); + LiveRange *DestLR = DestLI.begin(); + VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); + if (!NewVNI) { + NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); + MachineInstr *CopyInstr = I->second; + CopyInstr->getOperand(1).setIsKill(true); + } + + LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); + NewLI.addRange(NewLR); + + LI->removeInterval(DestReg); + MRI->replaceRegWith(DestReg, NewReg); + } + + // Adjust the live intervals of all PHI source registers to handle the case + // where the PHIs in successor blocks were the only later uses of the source + // register. + for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), + E = InsertedSrcCopySet.end(); I != E; ++I) { + MachineBasicBlock *MBB = I->first; + unsigned SrcReg = I->second; + if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) + SrcReg = RenamedRegister; + + LiveInterval &SrcLI = LI->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { + isLiveOut = true; + break; + } + } + + if (isLiveOut) + continue; + + MachineOperand *LastUse = findLastUse(MBB, SrcReg); + assert(LastUse); + SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); + SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); + LastUse->setIsKill(true); + } + + Allocator.Reset(); + RegNodeMap.clear(); + PHISrcDefs.clear(); + InsertedSrcCopySet.clear(); + InsertedSrcCopyMap.clear(); + InsertedDestCopies.clear(); + + return Changed; +} + +void StrongPHIElimination::addReg(unsigned Reg) { + Node *&N = RegNodeMap[Reg]; + if (!N) + N = new (Allocator) Node(Reg); +} + +StrongPHIElimination::Node* +StrongPHIElimination::Node::getLeader() { + Node *N = this; + Node *Parent = parent.getPointer(); + Node *Grandparent = Parent->parent.getPointer(); + + while (Parent != Grandparent) { + N->parent.setPointer(Grandparent); + N = Grandparent; + Parent = Parent->parent.getPointer(); + Grandparent = Parent->parent.getPointer(); + } + + return Parent; +} + +unsigned StrongPHIElimination::getRegColor(unsigned Reg) { + DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); + if (RI == RegNodeMap.end()) + return 0; + Node *Node = RI->second; + if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) + return 0; + return Node->getLeader()->value; +} + +void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { + Node *Node1 = RegNodeMap[Reg1]->getLeader(); + Node *Node2 = RegNodeMap[Reg2]->getLeader(); + + if (Node1->rank > Node2->rank) { + Node2->parent.setPointer(Node1->getLeader()); + } else if (Node1->rank < Node2->rank) { + Node1->parent.setPointer(Node2->getLeader()); + } else if (Node1 != Node2) { + Node2->parent.setPointer(Node1->getLeader()); + Node1->rank++; + } +} + +void StrongPHIElimination::isolateReg(unsigned Reg) { + Node *Node = RegNodeMap[Reg]; + Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); +} + +unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { + assert(PHI->isPHI()); + + unsigned DestReg = PHI->getOperand(0).getReg(); + Node *DestNode = RegNodeMap[DestReg]; + if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) + return 0; + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); + if (SrcColor) + return SrcColor; + } + return 0; +} + +void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { + assert(PHI->isPHI()); + Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; + Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); +} + +/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any +/// interferences found between registers in the same congruence class. It +/// takes two DenseMaps as arguments that it also updates: +/// +/// 1) CurrentDominatingParent, which maps a color to the register in that +/// congruence class whose definition was most recently seen. +/// +/// 2) ImmediateDominatingParent, which maps a register to the register in the +/// same congruence class that most immediately dominates it. +/// +/// This function assumes that it is being called in a depth-first traversal +/// of the dominator tree. +/// +/// The algorithm used here is a generalization of the dominance-based SSA test +/// for two variables. If there are variables a_1, ..., a_n such that +/// +/// def(a_1) dom ... dom def(a_n), +/// +/// then we can test for an interference between any two a_i by only using O(n) +/// interference tests between pairs of variables. If i < j and a_i and a_j +/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). +/// Thus, in order to test for an interference involving a_i, we need only check +/// for a potential interference with a_i+1. +/// +/// This method can be generalized to arbitrary sets of variables by performing +/// a depth-first traversal of the dominator tree. As we traverse down a branch +/// of the dominator tree, we keep track of the current dominating variable and +/// only perform an interference test with that variable. However, when we go to +/// another branch of the dominator tree, the definition of the current dominating +/// variable may no longer dominate the current block. In order to correct this, +/// we need to use a stack of past choices of the current dominating variable +/// and pop from this stack until we find a variable whose definition actually +/// dominates the current block. +/// +/// There will be one push on this stack for each variable that has become the +/// current dominating variable, so instead of using an explicit stack we can +/// simply associate the previous choice for a current dominating variable with +/// the new choice. This works better in our implementation, where we test for +/// interference in multiple distinct sets at once. +void +StrongPHIElimination::SplitInterferencesForBasicBlock( + MachineBasicBlock &MBB, + DenseMap<unsigned, unsigned> &CurrentDominatingParent, + DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { + // Sort defs by their order in the original basic block, as the code below + // assumes that it is processing definitions in dominance order. + std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; + std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); + + for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), + BBE = DefInstrs.end(); BBI != BBE; ++BBI) { + for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), + E = (*BBI)->operands_end(); I != E; ++I) { + const MachineOperand &MO = *I; + + // FIXME: This would be faster if it were possible to bail out of checking + // an instruction's operands after the explicit defs, but this is incorrect + // for variadic instructions, which may appear before register allocation + // in the future. + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned DestReg = MO.getReg(); + if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // If the virtual register being defined is not used in any PHI or has + // already been isolated, then there are no more interferences to check. + unsigned DestColor = getRegColor(DestReg); + if (!DestColor) + continue; + + // The input to this pass sometimes is not in SSA form in every basic + // block, as some virtual registers have redefinitions. We could eliminate + // this by fixing the passes that generate the non-SSA code, or we could + // handle it here by tracking defining machine instructions rather than + // virtual registers. For now, we just handle the situation conservatively + // in a way that will possibly lead to false interferences. + unsigned &CurrentParent = CurrentDominatingParent[DestColor]; + unsigned NewParent = CurrentParent; + if (NewParent == DestReg) + continue; + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + + // If NewParent is nonzero, then its definition dominates the current + // instruction, so it is only necessary to check for the liveness of + // NewParent in order to check for an interference. + if (NewParent + && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { + // If there is an interference, always isolate the new register. This + // could be improved by using a heuristic that decides which of the two + // registers to isolate. + isolateReg(DestReg); + CurrentParent = NewParent; + } else { + // If there is no interference, update ImmediateDominatingParent and set + // the CurrentDominatingParent for this color to the current register. + ImmediateDominatingParent[DestReg] = NewParent; + CurrentParent = DestReg; + } + } + } + + // We now walk the PHIs in successor blocks and check for interferences. This + // is necessary because the use of a PHI's operands are logically contained in + // the predecessor block. The def of a PHI's destination register is processed + // along with the other defs in a basic block. + + CurrentPHIForColor.clear(); + + for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + MachineInstr *PHI = BBI; + + // If a PHI is already isolated, either by being isolated directly or + // having all of its operands isolated, ignore it. + unsigned Color = getPHIColor(PHI); + if (!Color) + continue; + + // Find the index of the PHI operand that corresponds to this basic block. + unsigned PredIndex; + for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { + if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) + break; + } + assert(PredIndex < PHI->getNumOperands()); + unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + unsigned &CurrentParent = CurrentDominatingParent[Color]; + unsigned NewParent = CurrentParent; + while (NewParent + && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + CurrentParent = NewParent; + + // If there is an interference with a register, always isolate the + // register rather than the PHI. It is also possible to isolate the + // PHI, but that introduces copies for all of the registers involved + // in that PHI. + if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) + && NewParent != PredOperandReg) + isolateReg(NewParent); + + std::pair<MachineInstr*, unsigned> + &CurrentPHI = CurrentPHIForColor[Color]; + + // If two PHIs have the same operand from every shared predecessor, then + // they don't actually interfere. Otherwise, isolate the current PHI. This + // could possibly be improved, e.g. we could isolate the PHI with the + // fewest operands. + if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) + isolatePHI(PHI); + else + CurrentPHI = std::make_pair(PHI, PredOperandReg); + } + } +} + +void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, + MachineBasicBlock *MBB) { + assert(PHI->isPHI()); + ++NumPHIsLowered; + unsigned PHIColor = getPHIColor(PHI); + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + MachineOperand &SrcMO = PHI->getOperand(i); + + // If a source is defined by an implicit def, there is no need to insert a + // copy in the predecessor. + if (SrcMO.isUndef()) + continue; + + unsigned SrcReg = SrcMO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); + unsigned SrcColor = getRegColor(SrcReg); + + // If neither the PHI nor the operand were isolated, then we only need to + // set the phi-kill flag on the VNInfo at this PHI. + if (PHIColor && SrcColor == PHIColor) { + LiveInterval &SrcInterval = LI->getInterval(SrcReg); + SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); + VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); + (void)SrcVNI; + assert(SrcVNI); + continue; + } + + unsigned CopyReg = 0; + if (PHIColor) { + SrcCopyMap::const_iterator I + = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); + CopyReg + = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; + } + + if (!CopyReg) { + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + CopyReg = MRI->createVirtualRegister(RC); + + MachineBasicBlock::iterator + CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); + unsigned SrcSubReg = SrcMO.getSubReg(); + MachineInstr *CopyInstr = BuildMI(*PredBB, + CopyInsertPoint, + PHI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + CopyReg).addReg(SrcReg, 0, SrcSubReg); + LI->InsertMachineInstrInMaps(CopyInstr); + ++NumSrcCopiesInserted; + + // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for + // the newly added range. + LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); + InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); + + addReg(CopyReg); + if (PHIColor) { + unionRegs(PHIColor, CopyReg); + assert(getRegColor(CopyReg) != CopyReg); + } else { + PHIColor = CopyReg; + assert(getRegColor(CopyReg) == CopyReg); + } + + // Insert into map if not already there. + InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), + CopyInstr)); + } + + SrcMO.setReg(CopyReg); + + // If SrcReg is not live beyond the PHI, trim its interval so that it is no + // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are + // processed later, but this is still correct to do at this point because we + // never rely on LiveIntervals being correct while inserting copies. + // FIXME: Should this just count uses at PHIs like the normal PHIElimination + // pass does? + LiveInterval &SrcLI = LI->getInterval(SrcReg); + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); + if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) + SrcLI.removeRange(MBBStartIndex, PHIIndex, true); + } + + unsigned DestReg = PHI->getOperand(0).getReg(); + unsigned DestColor = getRegColor(DestReg); + + if (PHIColor && DestColor == PHIColor) { + LiveInterval &DestLI = LI->getInterval(DestReg); + + // Set the phi-def flag for the VN at this PHI. + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); + assert(DestVNI); + + // Prior to PHI elimination, the live ranges of PHIs begin at their defining + // instruction. After PHI elimination, PHI instructions are replaced by VNs + // with the phi-def flag set, and the live ranges of these VNs start at the + // beginning of the basic block. + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + DestVNI->def = MBBStartIndex; + DestLI.addRange(LiveRange(MBBStartIndex, + PHIIndex.getRegSlot(), + DestVNI)); + return; + } + + const TargetRegisterClass *RC = MRI->getRegClass(DestReg); + unsigned CopyReg = MRI->createVirtualRegister(RC); + + MachineInstr *CopyInstr = BuildMI(*MBB, + MBB->SkipPHIsAndLabels(MBB->begin()), + PHI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + DestReg).addReg(CopyReg); + LI->InsertMachineInstrInMaps(CopyInstr); + PHI->getOperand(0).setReg(CopyReg); + ++NumDestCopiesInserted; + + // Add the region from the beginning of MBB to the copy instruction to + // CopyReg's live interval, and give the VNInfo the phidef flag. + LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); + VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, + LI->getVNInfoAllocator()); + CopyLI.addRange(LiveRange(MBBStartIndex, + DestCopyIndex.getRegSlot(), + CopyVNI)); + + // Adjust DestReg's live interval to adjust for its new definition at + // CopyInstr. + LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); + + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); + assert(DestVNI); + DestVNI->def = DestCopyIndex.getRegSlot(); + + InsertedDestCopies[CopyReg] = CopyInstr; +} + +void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { + if (Reg == NewReg) + return; + + LiveInterval &OldLI = LI->getInterval(Reg); + LiveInterval &NewLI = LI->getInterval(NewReg); + + // Merge the live ranges of the two registers. + DenseMap<VNInfo*, VNInfo*> VNMap; + for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); + LRI != LRE; ++LRI) { + LiveRange OldLR = *LRI; + VNInfo *OldVN = OldLR.valno; + + VNInfo *&NewVN = VNMap[OldVN]; + if (!NewVN) { + NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); + VNMap[OldVN] = NewVN; + } + + LiveRange LR(OldLR.start, OldLR.end, NewVN); + NewLI.addRange(LR); + } + + // Remove the LiveInterval for the register being renamed and replace all + // of its defs and uses with the new register. + LI->removeInterval(Reg); + MRI->replaceRegWith(Reg, NewReg); +} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp new file mode 100644 index 0000000..1ec8817 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -0,0 +1,970 @@ +//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass duplicates basic blocks ending in unconditional branches into +// the tails of their predecessors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tailduplication" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +STATISTIC(NumTails , "Number of tails duplicated"); +STATISTIC(NumTailDups , "Number of tail duplicated blocks"); +STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumAddedPHIs , "Number of phis added"); + +// Heuristic for tail duplication. +static cl::opt<unsigned> +TailDuplicateSize("tail-dup-size", + cl::desc("Maximum instructions to consider tail duplicating"), + cl::init(2), cl::Hidden); + +static cl::opt<bool> +TailDupVerify("tail-dup-verify", + cl::desc("Verify sanity of PHI instructions during taildup"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> +TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); + +typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; + +namespace { + /// TailDuplicatePass - Perform tail duplication. + class TailDuplicatePass : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineModuleInfo *MMI; + MachineRegisterInfo *MRI; + OwningPtr<RegScavenger> RS; + bool PreRegAlloc; + + // SSAUpdateVRs - A list of virtual registers for which to update SSA form. + SmallVector<unsigned, 16> SSAUpdateVRs; + + // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of + // source virtual registers. + DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; + + public: + static char ID; + explicit TailDuplicatePass() : + MachineFunctionPass(ID), PreRegAlloc(false) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB); + void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, + const DenseSet<unsigned> &UsedByPhi, + bool Remove); + void DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap, + const DenseSet<unsigned> &UsedByPhi); + void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*, 8> &Succs); + bool TailDuplicateBlocks(MachineFunction &MF); + bool shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, MachineBasicBlock &TailBB); + bool isSimpleBB(MachineBasicBlock *TailBB); + bool canCompletelyDuplicateBB(MachineBasicBlock &BB); + bool duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + const DenseSet<unsigned> &RegsUsedByPhi, + SmallVector<MachineInstr*, 16> &Copies); + bool TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies); + bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF); + + void RemoveDeadBlock(MachineBasicBlock *MBB); + }; + + char TailDuplicatePass::ID = 0; +} + +char &llvm::TailDuplicateID = TailDuplicatePass::ID; + +INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", + false, false) + +bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + PreRegAlloc = MRI->isSSA(); + RS.reset(); + if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS.reset(new RegScavenger()); + + bool MadeChange = false; + while (TailDuplicateBlocks(MF)) + MadeChange = true; + + return MadeChange; +} + +static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), + MBB->pred_end()); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != MBB->end()) { + if (!MI->isPHI()) + break; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + bool Found = false; + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (PHIBB == PredBB) { + Found = true; + break; + } + } + if (!Found) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " missing input from predecessor BB#" + << PredBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (CheckExtra && !Preds.count(PHIBB)) { + dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() + << ": " << *MI; + dbgs() << " extra input from predecessor BB#" + << PHIBB->getNumber() << '\n'; + llvm_unreachable(0); + } + if (PHIBB->getNumber() < 0) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + ++MI; + } + } +} + +/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. +bool +TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF) { + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) + return false; + + ++NumTails; + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = 0; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->isDebugValue()) { + // SSAUpdate can replace the use with an undef. That creates + // a debug instruction that is a kill. + // FIXME: Should it SSAUpdate job to delete debug instructions + // instead of replacing the use with undef? + UseMI->eraseFromParent(); + continue; + } + if (UseMI->getParent() == DefBB && !UseMI->isPHI()) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted by tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + if (MRI->hasOneNonDBGUse(Src) && + MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + + if (NewPHIs.size()) + NumAddedPHIs += NewPHIs.size(); + + return true; +} + +/// TailDuplicateBlocks - Look for small blocks that are unconditionally +/// branched to and do not fall through. Tail-duplicate their instructions +/// into their predecessors to eliminate (dynamic) branches. +bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { + bool MadeChange = false; + + if (PreRegAlloc && TailDupVerify) { + DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); + VerifyPHIs(MF, true); + } + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + + if (NumTails == TailDupLimit) + break; + + bool IsSimple = isSimpleBB(MBB); + + if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + continue; + + MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); + } + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); + + return MadeChange; +} + +static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->isDebugValue()) + continue; + if (UseMI->getParent() != BB) + return true; + } + return false; +} + +static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) + if (MI->getOperand(i+1).getMBB() == SrcBB) + return i; + return 0; +} + + +// Remember which registers are used by phis in this block. This is +// used to determine which registers are liveout while modifying the +// block (which is why we need to copy the information). +static void getRegsUsedByPHIs(const MachineBasicBlock &BB, + DenseSet<unsigned> *UsedByPhi) { + for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end(); + I != E; ++I) { + const MachineInstr &MI = *I; + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + unsigned SrcReg = MI.getOperand(i).getReg(); + UsedByPhi->insert(SrcReg); + } + } +} + +/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for +/// SSA update. +void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB) { + DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); + if (LI != SSAUpdateVals.end()) + LI->second.push_back(std::make_pair(BB, NewReg)); + else { + AvailableValsTy Vals; + Vals.push_back(std::make_pair(BB, NewReg)); + SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); + SSAUpdateVRs.push_back(OrigReg); + } +} + +/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. +/// Remember the source register that's contributed by PredBB and update SSA +/// update map. +void TailDuplicatePass::ProcessPHI(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, + const DenseSet<unsigned> &RegsUsedByPhi, + bool Remove) { + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); + assert(SrcOpIdx && "Unable to find matching PHI source?"); + unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); + + // Insert a copy from source to the end of the block. The def register is the + // available value liveout of the block. + unsigned NewDef = MRI->createVirtualRegister(RC); + Copies.push_back(std::make_pair(NewDef, SrcReg)); + if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) + AddSSAUpdateEntry(DefReg, NewDef, PredBB); + + if (!Remove) + return; + + // Remove PredBB from the PHI node. + MI->RemoveOperand(SrcOpIdx+1); + MI->RemoveOperand(SrcOpIdx); + if (MI->getNumOperands() == 1) + MI->eraseFromParent(); +} + +/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update +/// the source operands due to earlier PHI translation. +void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap, + const DenseSet<unsigned> &UsedByPhi) { + MachineInstr *NewMI = TII->duplicate(MI, MF); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isDef()) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + unsigned NewReg = MRI->createVirtualRegister(RC); + MO.setReg(NewReg); + LocalVRMap.insert(std::make_pair(Reg, NewReg)); + if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) + AddSSAUpdateEntry(Reg, NewReg, PredBB); + } else { + DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); + if (VI != LocalVRMap.end()) { + MO.setReg(VI->second); + MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); + } + } + } + PredBB->insert(PredBB->instr_end(), NewMI); +} + +/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor +/// blocks, the successors have gained new predecessors. Update the PHI +/// instructions in them accordingly. +void +TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*,8> &Succs) { + for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), + SE = Succs.end(); SI != SE; ++SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); + II != EE; ++II) { + if (!II->isPHI()) + break; + MachineInstrBuilder MIB(*FromBB->getParent(), II); + unsigned Idx = 0; + for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + Idx = i; + break; + } + } + + assert(Idx != 0); + MachineOperand &MO0 = II->getOperand(Idx); + unsigned Reg = MO0.getReg(); + if (isDead) { + // Folded into the previous BB. + // There could be duplicate phi source entries. FIXME: Should sdisel + // or earlier pass fixed this? + for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + II->RemoveOperand(i+1); + II->RemoveOperand(i); + } + } + } else + Idx = 0; + + // If Idx is set, the operands at Idx and Idx+1 must be removed. + // We reuse the location to avoid expensive RemoveOperand calls. + + DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); + if (LI != SSAUpdateVals.end()) { + // This register is defined in the tail block. + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + // If we didn't duplicate a bb into a particular predecessor, we + // might still have added an entry to SSAUpdateVals to correcly + // recompute SSA. If that case, avoid adding a dummy extra argument + // this PHI. + if (!SrcBB->isSuccessor(SuccBB)) + continue; + + unsigned SrcReg = LI->second[j].second; + if (Idx != 0) { + II->getOperand(Idx).setReg(SrcReg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + MIB.addReg(SrcReg).addMBB(SrcBB); + } + } + } else { + // Live in tail block, must also be live in predecessors. + for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = TDBBs[j]; + if (Idx != 0) { + II->getOperand(Idx).setReg(Reg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + MIB.addReg(Reg).addMBB(SrcBB); + } + } + } + if (Idx != 0) { + II->RemoveOperand(Idx+1); + II->RemoveOperand(Idx); + } + } + } +} + +/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. +bool +TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, + MachineBasicBlock &TailBB) { + // Only duplicate blocks that end with unconditional branches. + if (TailBB.canFallThrough()) + return false; + + // Don't try to tail-duplicate single-block loops. + if (TailBB.isSuccessor(&TailBB)) + return false; + + // Set the limit on the cost to duplicate. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. + unsigned MaxDuplicateCount; + if (TailDuplicateSize.getNumOccurrences() == 0 && + MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) + MaxDuplicateCount = 1; + else + MaxDuplicateCount = TailDuplicateSize; + + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging and other optimizations + // that rearrange the predecessors of the indirect branch. + + bool HasIndirectbr = false; + if (!TailBB.empty()) + HasIndirectbr = TailBB.back().isIndirectBranch(); + + if (HasIndirectbr && PreRegAlloc) + MaxDuplicateCount = 20; + + // Check the instructions in the block to determine whether tail-duplication + // is invalid or unlikely to be profitable. + unsigned InstrCount = 0; + for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { + // Non-duplicable things shouldn't be tail-duplicated. + if (I->isNotDuplicable()) + return false; + + // Do not duplicate 'return' instructions if this is a pre-regalloc run. + // A return may expand into a lot more instructions (e.g. reload of callee + // saved registers) after PEI. + if (PreRegAlloc && I->isReturn()) + return false; + + // Avoid duplicating calls before register allocation. Calls presents a + // barrier to register allocation so duplicating them may end up increasing + // spills. + if (PreRegAlloc && I->isCall()) + return false; + + if (!I->isPHI() && !I->isDebugValue()) + InstrCount += 1; + + if (InstrCount > MaxDuplicateCount) + return false; + } + + if (HasIndirectbr && PreRegAlloc) + return true; + + if (IsSimple) + return true; + + if (!PreRegAlloc) + return true; + + return canCompletelyDuplicateBB(TailBB); +} + +/// isSimpleBB - True if this BB has only one unconditional jump. +bool +TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { + if (TailBB->succ_size() != 1) + return false; + if (TailBB->pred_empty()) + return false; + MachineBasicBlock::iterator I = TailBB->begin(); + MachineBasicBlock::iterator E = TailBB->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I == E) + return true; + return I->isUnconditionalBranch(); +} + +static bool +bothUsedInPHI(const MachineBasicBlock &A, + SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { + for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), + SE = A.succ_end(); SI != SE; ++SI) { + MachineBasicBlock *BB = *SI; + if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) + return true; + } + + return false; +} + +bool +TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); + + for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), + PE = BB.pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->succ_size() > 1) + return false; + + MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + return false; + + if (!PredCond.empty()) + return false; + } + return true; +} + +bool +TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVector<MachineInstr*, 16> &Copies) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), + TailBB->succ_end()); + SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + bool Changed = false; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->getLandingPadSuccessor()) + continue; + + if (bothUsedInPHI(*PredBB, Succs)) + continue; + + MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + + Changed = true; + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From simple Succ: " << *TailBB); + + MachineBasicBlock *NewTarget = *TailBB->succ_begin(); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); + + // Make PredFBB explicit. + if (PredCond.empty()) + PredFBB = PredTBB; + + // Make fall through explicit. + if (!PredTBB) + PredTBB = NextBB; + if (!PredFBB) + PredFBB = NextBB; + + // Redirect + if (PredFBB == TailBB) + PredFBB = NewTarget; + if (PredTBB == TailBB) + PredTBB = NewTarget; + + // Make the branch unconditional if possible + if (PredTBB == PredFBB) { + PredCond.clear(); + PredFBB = NULL; + } + + // Avoid adding fall through branches. + if (PredFBB == NextBB) + PredFBB = NULL; + if (PredTBB == NextBB && PredFBB == NULL) + PredTBB = NULL; + + TII->RemoveBranch(*PredBB); + + if (PredTBB) + TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + + PredBB->removeSuccessor(TailBB); + unsigned NumSuccessors = PredBB->succ_size(); + assert(NumSuccessors <= 1); + if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) + PredBB->addSuccessor(NewTarget); + + TDBBs.push_back(PredBB); + } + return Changed; +} + +/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies) { + DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + + DenseSet<unsigned> UsedByPhi; + getRegsUsedByPHIs(*TailBB, &UsedByPhi); + + if (IsSimple) + return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); + + // Iterate through all the unique predecessors and tail-duplicate this + // block into them, if possible. Copying the list ahead of time also + // avoids trouble with the predecessor list reallocating. + bool Changed = false; + SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + assert(TailBB != PredBB && + "Single-block loop should have been rejected earlier!"); + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() > 1) + continue; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + if (!PredCond.empty()) + continue; + // Don't duplicate into a fall-through predecessor (at least for now). + if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + continue; + + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); + + TDBBs.push_back(PredBB); + + // Remove PredBB's unconditional branch. + TII->RemoveBranch(*PredBB); + + if (RS && !TailBB->livein_empty()) { + // Update PredBB livein. + RS->enterBasicBlock(PredBB); + if (!PredBB->empty()) + RS->forward(prior(PredBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), + E = TailBB->livein_end(); I != E; ++I) { + if (!RegsLiveAtExit[*I]) + // If a register is previously livein to the tail but it's not live + // at the end of predecessor BB, then it should be added to its + // livein list. + PredBB->addLiveIn(*I); + } + } + + // Clone the contents of TailBB into PredBB. + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + // Use instr_iterator here to properly handle bundles, e.g. + // ARM Thumb2 IT block. + MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); + while (I != TailBB->instr_end()) { + MachineInstr *MI = &*I; + ++I; + if (MI->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); + } else { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); + } + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); + } + + // Simplify + TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch + + // Update the CFG. + PredBB->removeSuccessor(PredBB->succ_begin()); + assert(PredBB->succ_empty() && + "TailDuplicate called on block with multiple successors!"); + for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I); + + Changed = true; + ++NumTailDups; + } + + // If TailBB was duplicated into all its predecessors except for the prior + // block, which falls through unconditionally, move the contents of this + // block into the prior block. + MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector<MachineOperand, 4> PriorCond; + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PrevBB->succ_size() == 1 && + !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && + PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && + !TailBB->hasAddressTaken()) { + DEBUG(dbgs() << "\nMerging into block: " << *PrevBB + << "From MBB: " << *TailBB); + if (PreRegAlloc) { + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); + if (MI->getParent()) + MI->eraseFromParent(); + } + + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); + DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); + MI->eraseFromParent(); + } + MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first) + .addReg(CopyInfos[i].second)); + } + } else { + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + } + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); + TDBBs.push_back(PrevBB); + Changed = true; + } + + // If this is after register allocation, there are no phis to fix. + if (!PreRegAlloc) + return Changed; + + // If we made no changes so far, we are safe. + if (!Changed) + return Changed; + + + // Handle the nasty case in that we duplicated a block that is part of a loop + // into some but not all of its predecessors. For example: + // 1 -> 2 <-> 3 | + // \ | + // \---> rest | + // if we duplicate 2 into 1 but not into 3, we end up with + // 12 -> 3 <-> 2 -> rest | + // \ / | + // \----->-----/ | + // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced + // with a phi in 3 (which now dominates 2). + // What we do here is introduce a copy in 3 of the register defined by the + // phi, just like when we are duplicating 2 into 3, but we don't copy any + // real instructions or remove the 3 -> 2 edge from the phi in 2. + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) + continue; + + // EH edges + if (PredBB->succ_size() != 1) + continue; + + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); + } + } + + return Changed; +} + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + + // Remove all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // Remove the block. + MBB->eraseFromParent(); +} diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp new file mode 100644 index 0000000..883e9d1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -0,0 +1,44 @@ +//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the layout of a stack frame on the target machine. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cstdlib> +using namespace llvm; + +TargetFrameLowering::~TargetFrameLowering() { +} + +/// getFrameIndexOffset - Returns the displacement from the frame register to +/// the stack frame of the specified index. This is the default implementation +/// which is overridden for some targets. +int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); +} + +int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + + // By default, assume all frame indices are referenced via whatever + // getFrameRegister() says. The target can override this if it's doing + // something different. + FrameReg = RI->getFrameRegister(MF); + return getFrameIndexOffset(MF, FI); +} diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp new file mode 100644 index 0000000..20eb918 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -0,0 +1,739 @@ +//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cctype> +using namespace llvm; + +static cl::opt<bool> DisableHazardRecognizer( + "disable-sched-hazard", cl::Hidden, cl::init(false), + cl::desc("Disable hazard detection during preRA scheduling")); + +TargetInstrInfo::~TargetInstrInfo() { +} + +const TargetRegisterClass* +TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const { + if (OpNum >= MCID.getNumOperands()) + return 0; + + short RegClass = MCID.OpInfo[OpNum].RegClass; + if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) + return TRI->getPointerRegClass(MF, RegClass); + + // Instructions like INSERT_SUBREG do not have fixed register classes. + if (RegClass < 0) + return 0; + + // Otherwise just look it up normally. + return TRI->getRegClass(RegClass); +} + +/// insertNoop - Insert a noop into the instruction stream at the specified +/// point. +void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + llvm_unreachable("Target didn't implement insertNoop!"); +} + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorString or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorString or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + + + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) + atInsnStart = true; + if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { + Length += MAI.getMaxInstLength(); + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) + atInsnStart = false; + } + + return Length; +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void +TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + +// commuteInstruction - The default implementation of this method just exchanges +// the two operands returned by findCommutedOpIndices. +MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI) const { + const MCInstrDesc &MCID = MI->getDesc(); + bool HasDef = MCID.getNumDefs(); + if (HasDef && !MI->getOperand(0).isReg()) + // No idea how to commute this instruction. Target should implement its own. + return 0; + unsigned Idx1, Idx2; + if (!findCommutedOpIndices(MI, Idx1, Idx2)) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Don't know how to commute: " << *MI; + report_fatal_error(Msg.str()); + } + + assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && + "This only knows how to commute register operands so far"); + unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; + unsigned Reg1 = MI->getOperand(Idx1).getReg(); + unsigned Reg2 = MI->getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); + bool Reg1IsKill = MI->getOperand(Idx1).isKill(); + bool Reg2IsKill = MI->getOperand(Idx2).isKill(); + // If destination is tied to either of the commuted source register, then + // it must be updated. + if (HasDef && Reg0 == Reg1 && + MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { + Reg2IsKill = false; + Reg0 = Reg2; + SubReg0 = SubReg2; + } else if (HasDef && Reg0 == Reg2 && + MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { + Reg1IsKill = false; + Reg0 = Reg1; + SubReg0 = SubReg1; + } + + if (NewMI) { + // Create a new instruction. + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + } + + if (HasDef) { + MI->getOperand(0).setReg(Reg0); + MI->getOperand(0).setSubReg(SubReg0); + } + MI->getOperand(Idx2).setReg(Reg1); + MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setSubReg(SubReg1); + MI->getOperand(Idx1).setSubReg(SubReg2); + MI->getOperand(Idx2).setIsKill(Reg1IsKill); + MI->getOperand(Idx1).setIsKill(Reg2IsKill); + return MI; +} + +/// findCommutedOpIndices - If specified MI is commutable, return the two +/// operand indices that would swap value. Return true if the instruction +/// is not in a form which this routine understands. +bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + assert(!MI->isBundle() && + "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); + + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isCommutable()) + return false; + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this + // is not true, then the target must implement this. + SrcOpIdx1 = MCID.getNumDefs(); + SrcOpIdx2 = SrcOpIdx1 + 1; + if (!MI->getOperand(SrcOpIdx1).isReg() || + !MI->getOperand(SrcOpIdx2).isReg()) + // No idea. + return false; + return true; +} + + +bool +TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + if (!MI->isPredicable()) + return true; + return !isPredicated(MI); +} + + +bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + bool MadeChange = false; + + assert(!MI->isBundle() && + "TargetInstrInfo::PredicateInstruction() can't handle bundles"); + + const MCInstrDesc &MCID = MI->getDesc(); + if (!MI->isPredicable()) + return false; + + for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MCID.OpInfo[i].isPredicate()) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + MO.setReg(Pred[j].getReg()); + MadeChange = true; + } else if (MO.isImm()) { + MO.setImm(Pred[j].getImm()); + MadeChange = true; + } else if (MO.isMBB()) { + MO.setMBB(Pred[j].getMBB()); + MadeChange = true; + } + ++j; + } + } + return MadeChange; +} + +bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isLoad() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + return false; +} + +bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isStore() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + return false; +} + +void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo &TRI) const { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); + MBB.insert(I, MI); +} + +bool +TargetInstrInfo::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { + return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); +} + +MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig, + MachineFunction &MF) const { + assert(!Orig->isNotDuplicable() && + "Instruction cannot be duplicated"); + return MF.CloneMachineInstr(Orig); +} + +// If the COPY instruction in MI can be folded to a stack operation, return +// the register class to use. +static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, + unsigned FoldIdx) { + assert(MI->isCopy() && "MI must be a COPY instruction"); + if (MI->getNumOperands() != 2) + return 0; + assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); + + const MachineOperand &FoldOp = MI->getOperand(FoldIdx); + const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + + if (FoldOp.getSubReg() || LiveOp.getSubReg()) + return 0; + + unsigned FoldReg = FoldOp.getReg(); + unsigned LiveReg = LiveOp.getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && + "Cannot fold physregs"); + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); + + if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + return RC->contains(LiveOp.getReg()) ? RC : 0; + + if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) + return RC; + + // FIXME: Allow folding when register classes are memory compatible. + return 0; +} + +bool TargetInstrInfo:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { + return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); +} + +/// foldMemoryOperand - Attempt to fold a load or store of the specified stack +/// slot into the specified machine instruction for the specified operand(s). +/// If this is possible, a new instruction is returned with the specified +/// operand folded, otherwise NULL is returned. The client is responsible for +/// removing the old instruction and adding the new one in the instruction +/// stream. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops, + int FI) const { + unsigned Flags = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (MI->getOperand(Ops[i]).isDef()) + Flags |= MachineMemOperand::MOStore; + else + Flags |= MachineMemOperand::MOLoad; + + MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "foldMemoryOperand needs an inserted instruction"); + MachineFunction &MF = *MBB->getParent(); + + // Ask the target to do the actual folding. + if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + // Add a memory operand, foldMemoryOperandImpl doesn't do that. + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + Flags, MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); + + // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. + return MBB->insert(MI, NewMI); + } + + // Straight COPY may fold as load/store. + if (!MI->isCopy() || Ops.size() != 1) + return 0; + + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + if (!RC) + return 0; + + const MachineOperand &MO = MI->getOperand(1-Ops[0]); + MachineBasicBlock::iterator Pos = MI; + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + if (Flags == MachineMemOperand::MOStore) + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + else + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + return --Pos; +} + +/// foldMemoryOperand - Same as the previous version except it allows folding +/// of any load and store from / to any address, not just from a specific +/// stack slot. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); +#endif + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + + // Ask the target to do the actual folding. + MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + if (!NewMI) return 0; + + NewMI = MBB.insert(MI, NewMI); + + // Copy the memoperands from the load to the folded instruction. + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + + return NewMI; +} + +bool TargetInstrInfo:: +isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + + // Remat clients assume operand 0 is the defined register. + if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) + return false; + unsigned DefReg = MI->getOperand(0).getReg(); + + // A sub-register definition can only be rematerialized if the instruction + // doesn't read the other parts of the register. Otherwise it is really a + // read-modify-write operation on the full virtual register which cannot be + // moved safely. + if (TargetRegisterInfo::isVirtualRegister(DefReg) && + MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg)) + return false; + + // A load from a fixed stack slot can be rematerialized. This may be + // redundant with subsequent checks, but it's target-independent, + // simple, and a common case. + int FrameIdx = 0; + if (TII.isLoadFromStackSlot(MI, FrameIdx) && + MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + return true; + + // Avoid instructions obviously unsafe for remat. + if (MI->isNotDuplicable() || MI->mayStore() || + MI->hasUnmodeledSideEffects()) + return false; + + // Don't remat inline asm. We have no idea how expensive it is + // even if it's side effect free. + if (MI->isInlineAsm()) + return false; + + // Avoid instructions which load from potentially varying memory. + if (MI->mayLoad() && !MI->isInvariantLoad(AA)) + return false; + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Check for a well-behaved physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI.isConstantPhysReg(Reg, MF)) + return false; + } else { + // A physreg def. We can't remat it. + return false; + } + continue; + } + + // Only allow one virtual-register def. There may be multiple defs of the + // same virtual register, though. + if (MO.isDef() && Reg != DefReg) + return false; + + // Don't allow any virtual-register uses. Rematting an instruction with + // virtual register uses would length the live ranges of the uses, which + // is not necessarily a good idea, certainly not "trivial". + if (MO.isUse()) + return false; + } + + // Everything checked out. + return true; +} + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Terminators and labels can't be scheduled around. + if (MI->isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI)) + return true; + + return false; +} + +// Provide a global flag for disabling the PreRA hazard recognizer that targets +// may choose to honor. +bool TargetInstrInfo::usePreRAHazardRecognizer() const { + return !DisableHazardRecognizer; +} + +// Default implementation of CreateTargetRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Dummy hazard recognizer allows all instructions to issue. + return new ScheduleHazardRecognizer(); +} + +// Default implementation of CreateTargetMIHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "misched"); +} + +// Default implementation of CreateTargetPostRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG latency interface. +//===----------------------------------------------------------------------===// + +int +TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + if (!DefNode->isMachineOpcode()) + return -1; + + unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); + if (!UseNode->isMachineOpcode()) + return ItinData->getOperandCycle(DefClass, DefIdx); + unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + if (!N->isMachineOpcode()) + return 1; + + return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); +} + +//===----------------------------------------------------------------------===// +// MachineInstr latency interface. +//===----------------------------------------------------------------------===// + +unsigned +TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + unsigned Class = MI->getDesc().getSchedClass(); + int UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps >= 0) + return UOps; + + // The # of u-ops is dynamically determined. The specific target should + // override this function to return the right number. + return 1; +} + +/// Return the default expected latency for a def based on it's opcode. +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, + const MachineInstr *DefMI) const { + if (DefMI->isTransient()) + return 0; + if (DefMI->mayLoad()) + return SchedModel->LoadLatency; + if (isHighLatencyDef(DefMI->getOpcode())) + return SchedModel->HighLatency; + return 1; +} + +unsigned TargetInstrInfo:: +getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return MI->mayLoad() ? 2 : 1; + + return ItinData->getStageLatency(MI->getDesc().getSchedClass()); +} + +bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, + unsigned DefIdx) const { + if (!ItinData || ItinData->isEmpty()) + return false; + + unsigned DefClass = DefMI->getDesc().getSchedClass(); + int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + return (DefCycle != -1 && DefCycle <= 1); +} + +/// Both DefMI and UseMI must be valid. By default, call directly to the +/// itinerary. This may be overriden by the target. +int TargetInstrInfo:: +getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned UseClass = UseMI->getDesc().getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +/// If we can determine the operand latency from the def only, without itinerary +/// lookup, do so. Otherwise return -1. +int TargetInstrInfo::computeDefOperandLatency( + const InstrItineraryData *ItinData, + const MachineInstr *DefMI, bool FindMin) const { + + // Let the target hook getInstrLatency handle missing itineraries. + if (!ItinData) + return getInstrLatency(ItinData, DefMI); + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is valid, call getInstrLatency. This uses Stage latency if + // it exists before defaulting to MinLatency. + if (ItinData->SchedModel->MinLatency >= 0) + return getInstrLatency(ItinData, DefMI); + + // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. + // For empty itineraries, short-cirtuit the check and default to one cycle. + if (ItinData->isEmpty()) + return 1; + } + else if(ItinData->isEmpty()) + return defaultDefLatency(ItinData->SchedModel, DefMI); + + // ...operand lookup required + return -1; +} + +/// computeOperandLatency - Compute and return the latency of the given data +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. +/// +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. +unsigned TargetInstrInfo:: +computeOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx, + bool FindMin) const { + + int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); + + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = getInstrLatency(ItinData, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); + return InstrLatency; +} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp new file mode 100644 index 0000000..f42bdbd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -0,0 +1,1305 @@ +//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLoweringBase class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cctype> +using namespace llvm; + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names, const TargetMachine &TM) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; + Names[RTLIB::SDIV_I8] = "__divqi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = 0; + Names[RTLIB::SDIVREM_I16] = 0; + Names[RTLIB::SDIVREM_I32] = 0; + Names[RTLIB::SDIVREM_I64] = 0; + Names[RTLIB::SDIVREM_I128] = 0; + Names[RTLIB::UDIVREM_I8] = 0; + Names[RTLIB::UDIVREM_I16] = 0; + Names[RTLIB::UDIVREM_I32] = 0; + Names[RTLIB::UDIVREM_I64] = 0; + Names[RTLIB::UDIVREM_I128] = 0; + + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_F128] = "__addtf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_F128] = "__subtf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_F128] = "__multf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_F128] = "__divtf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_F128] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_F128] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_F128] = "__powitf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_F128] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_F128] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_F128] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_F128] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_F128] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_F128] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_F128] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_F128] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_F128] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_F128] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_F128] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_F128] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_F128] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::COPYSIGN_F32] = "copysignf"; + Names[RTLIB::COPYSIGN_F64] = "copysign"; + Names[RTLIB::COPYSIGN_F80] = "copysignl"; + Names[RTLIB::COPYSIGN_F128] = "copysignl"; + Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; + Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; + Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; + Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; + Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; + Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; + Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::OEQ_F128] = "__eqtf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::UNE_F128] = "__netf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OGE_F128] = "__getf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLT_F128] = "__lttf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OLE_F128] = "__letf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::OGT_F128] = "__gttf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::UO_F128] = "__unordtf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::O_F128] = "__unordtf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + + if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + Names[RTLIB::SINCOS_F32] = "sincosf"; + Names[RTLIB::SINCOS_F64] = "sincos"; + Names[RTLIB::SINCOS_F80] = "sincosl"; + Names[RTLIB::SINCOS_F128] = "sincosl"; + Names[RTLIB::SINCOS_PPCF128] = "sincosl"; + } else { + // These are generally not available. + Names[RTLIB::SINCOS_F32] = 0; + Names[RTLIB::SINCOS_F64] = 0; + Names[RTLIB::SINCOS_F80] = 0; + Names[RTLIB::SINCOS_F128] = 0; + Names[RTLIB::SINCOS_PPCF128] = 0; + } +} + +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + if (RetVT == MVT::f128) + return FPEXT_F32_F128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::f128) + return FPEXT_F64_F128; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::f128) + return FPROUND_F128_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::f128) + return FPROUND_F128_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOSINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOSINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOSINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOUINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOUINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOUINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::UNE_F128] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OGE_F128] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLT_F128] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OLE_F128] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::OGT_F128] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::UO_F128] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; + CCs[RTLIB::O_F128] = ISD::SETEQ; +} + +/// NOTE: The constructor takes ownership of TLOF. +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) + : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use isFPImmLegal() + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + setOperationAction(ISD::ConstantFP, MVT::f128, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f128, Expand); + setOperationAction(ISD::FLOG2, MVT::f128, Expand); + setOperationAction(ISD::FLOG10, MVT::f128, Expand); + setOperationAction(ISD::FEXP , MVT::f128, Expand); + setOperationAction(ISD::FEXP2, MVT::f128, Expand); + setOperationAction(ISD::FFLOOR, MVT::f128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); + setOperationAction(ISD::FCEIL, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize + = MaxStoresPerMemmoveOptSize = 4; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + JumpIsExpensive = false; + PredictableSelectIsExpensive = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::ILP; + JumpBufSize = 0; + JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; + PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; + InsertFencesForAtomic = false; + SupportJumpTables = true; + MinimumJumpTableEntries = 4; + + InitLibcallNames(LibcallRoutineNames, TM); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +TargetLoweringBase::~TargetLoweringBase() { + delete &TLOF; +} + +MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { + return MVT::getIntegerVT(8*TD->getPointerSize(0)); +} + +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return getScalarShiftAmountTy(LHSTy); +} + +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT, + TargetLoweringBase *TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + MVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +std::pair<const TargetRegisterClass*, uint8_t> +TargetLoweringBase::findRepresentativeClass(MVT VT) const { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + + // Compute the set of all super-register classes. + BitVector SuperRegRC(TRI->getNumRegClasses()); + for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) + SuperRegRC.setBitsInMask(RCI.getMask()); + + // Find the first legal register class with the largest spill size. + const TargetRegisterClass *BestRC = RC; + for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + const TargetRegisterClass *SuperRC = TRI->getRegClass(i); + // We want the largest possible spill size. + if (SuperRC->getSize() <= BestRC->getSize()) + continue; + if (!isLegalRC(SuperRC)) + continue; + BestRC = SuperRC; + } + return std::make_pair(BestRC, 1); +} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLoweringBase::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; + ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, + TypeExpandInteger); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (const MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); + } + + // Decide how to handle f128. If the target does not have native f128 support, + // expand it to i128 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f128)) { + NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; + RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; + TransformToType[MVT::f128] = MVT::i128; + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (isTypeLegal(VT)) continue; + + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { + bool IsLegalWiderType = false; + // First try to promote the elements of integer vectors. If no legal + // promotion was found, fallback to the widen-vector method. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + // Promote vectors of integers to vectors with the same number + // of elements, with a wider element type. + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() + && SVT.getVectorNumElements() == NElts && + isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypePromoteInteger); + IsLegalWiderType = true; + break; + } + } + + if (IsLegalWiderType) continue; + + // Try to widen the vector. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + unsigned NumElts = VT.getVectorNumElements(); + ValueTypeActions.setTypeAction(VT, + NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } +} + +EVT TargetLoweringBase::getSetCCResultType(EVT VT) const { + assert(!VT.isVector() && "No default SetCC type for vectors!"); + return getPointerTy(0).SimpleTy; +} + +MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { + return MVT::i32; // return the default value +} + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // or a promoted vector type that has the same number of elements which + // are wider, then we should convert to that legal vector type. + // This handles things like <2 x float> -> <4 x float> and + // <4 x i1> -> <4 x i32>. + LegalizeTypeAction TA = getTypeAction(Context, VT); + if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + EVT RegisterEVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterEVT)) { + IntermediateVT = RegisterEVT; + RegisterVT = RegisterEVT.getSimpleVT(); + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. + EVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(Context, NewVT); + RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, + SmallVectorImpl<ISD::OutputArg> &Outs, + const TargetLowering &TLI) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + Flags.setSExt(); + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + } +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +//===----------------------------------------------------------------------===// +// TargetTransformInfo Helpers +//===----------------------------------------------------------------------===// + +int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/IR/Instruction.def" + }; + switch (static_cast<InstructionOpcodes>(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair<unsigned, MVT> +TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getValueType(Ty); + + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + LegalizeKind LK = getTypeConversion(C, MTy); + + if (LK.first == TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) + Cost *= 2; + + // Keep legalizing the type. + MTy = LK.second; + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp new file mode 100644 index 0000000..3bdca4c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -0,0 +1,784 @@ +//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; +using namespace dwarf; + +//===----------------------------------------------------------------------===// +// ELF +//===----------------------------------------------------------------------===// + +MCSymbol * +TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, + Mangler *Mang, + MachineModuleInfo *MMI) const { + unsigned Encoding = getPersonalityEncoding(); + switch (Encoding & 0x70) { + default: + report_fatal_error("We do not support this DWARF encoding yet!"); + case dwarf::DW_EH_PE_absptr: + return Mang->getSymbol(GV); + case dwarf::DW_EH_PE_pcrel: { + return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + + Mang->getSymbol(GV)->getName()); + } + } +} + +void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const { + SmallString<64> NameData("DW.ref."); + NameData += Sym->getName(); + MCSymbol *Label = getContext().GetOrCreateSymbol(NameData); + Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); + Streamer.EmitSymbolAttribute(Label, MCSA_Weak); + StringRef Prefix = ".data."; + NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end()); + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; + const MCSection *Sec = getContext().getELFSection(NameData, + ELF::SHT_PROGBITS, + Flags, + SectionKind::getDataRel(), + 0, Label->getName()); + unsigned Size = TM.getDataLayout()->getPointerSize(); + Streamer.SwitchSection(Sec); + Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); + Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); + const MCExpr *E = MCConstantExpr::Create(Size, getContext()); + Streamer.EmitELFSize(Label, E); + Streamer.EmitLabel(Label); + + Streamer.EmitSymbolValue(Sym, Size); +} + +const MCExpr *TargetLoweringObjectFileELF:: +getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const { + + if (Encoding & dwarf::DW_EH_PE_indirect) { + MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += ".DW.stub"; + + // Add information about the stub reference to ELFMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return TargetLoweringObjectFile:: + getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()), + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + } + + return TargetLoweringObjectFile:: + getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); +} + +static SectionKind +getELFKindForNamedSection(StringRef Name, SectionKind K) { + // N.B.: The defaults used in here are no the same ones used in MC. + // We follow gcc, MC follows gas. For example, given ".section .eh_frame", + // both gas and MC will produce a section with no flags. Given + // section(".eh_frame") gcc will produce: + // + // .section .eh_frame,"a",@progbits + if (Name.empty() || Name[0] != '.') return K; + + // Some lame default implementation based on some magic section names. + if (Name == ".bss" || + Name.startswith(".bss.") || + Name.startswith(".gnu.linkonce.b.") || + Name.startswith(".llvm.linkonce.b.") || + Name == ".sbss" || + Name.startswith(".sbss.") || + Name.startswith(".gnu.linkonce.sb.") || + Name.startswith(".llvm.linkonce.sb.")) + return SectionKind::getBSS(); + + if (Name == ".tdata" || + Name.startswith(".tdata.") || + Name.startswith(".gnu.linkonce.td.") || + Name.startswith(".llvm.linkonce.td.")) + return SectionKind::getThreadData(); + + if (Name == ".tbss" || + Name.startswith(".tbss.") || + Name.startswith(".gnu.linkonce.tb.") || + Name.startswith(".llvm.linkonce.tb.")) + return SectionKind::getThreadBSS(); + + return K; +} + + +static unsigned getELFSectionType(StringRef Name, SectionKind K) { + + if (Name == ".init_array") + return ELF::SHT_INIT_ARRAY; + + if (Name == ".fini_array") + return ELF::SHT_FINI_ARRAY; + + if (Name == ".preinit_array") + return ELF::SHT_PREINIT_ARRAY; + + if (K.isBSS() || K.isThreadBSS()) + return ELF::SHT_NOBITS; + + return ELF::SHT_PROGBITS; +} + + +static unsigned +getELFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= ELF::SHF_ALLOC; + + if (K.isText()) + Flags |= ELF::SHF_EXECINSTR; + + if (K.isWriteable()) + Flags |= ELF::SHF_WRITE; + + if (K.isThreadLocal()) + Flags |= ELF::SHF_TLS; + + // K.isMergeableConst() is left out to honour PR4650 + if (K.isMergeableCString() || K.isMergeableConst4() || + K.isMergeableConst8() || K.isMergeableConst16()) + Flags |= ELF::SHF_MERGE; + + if (K.isMergeableCString()) + Flags |= ELF::SHF_STRINGS; + + return Flags; +} + + +const MCSection *TargetLoweringObjectFileELF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + StringRef SectionName = GV->getSection(); + + // Infer section flags from the section name if we can. + Kind = getELFKindForNamedSection(SectionName, Kind); + + return getContext().getELFSection(SectionName, + getELFSectionType(SectionName, Kind), + getELFSectionFlags(Kind), Kind); +} + +/// getSectionPrefixForGlobal - Return the section prefix name used by options +/// FunctionsSections and DataSections. +static const char *getSectionPrefixForGlobal(SectionKind Kind) { + if (Kind.isText()) return ".text."; + if (Kind.isReadOnly()) return ".rodata."; + if (Kind.isBSS()) return ".bss."; + + if (Kind.isThreadData()) return ".tdata."; + if (Kind.isThreadBSS()) return ".tbss."; + + if (Kind.isDataNoRel()) return ".data."; + if (Kind.isDataRelLocal()) return ".data.rel.local."; + if (Kind.isDataRel()) return ".data.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".data.rel.ro."; +} + + +const MCSection *TargetLoweringObjectFileELF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniquedSection; + if (Kind.isText()) + EmitUniquedSection = TM.getFunctionSections(); + else + EmitUniquedSection = TM.getDataSections(); + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if ((GV->isWeakForLinker() || EmitUniquedSection) && + !Kind.isCommon()) { + const char *Prefix; + Prefix = getSectionPrefixForGlobal(Kind); + + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + MCSymbol *Sym = Mang->getSymbol(GV); + Name.append(Sym->getName().begin(), Sym->getName().end()); + StringRef Group = ""; + unsigned Flags = getELFSectionFlags(Kind); + if (GV->isWeakForLinker()) { + Group = Sym->getName(); + Flags |= ELF::SHF_GROUP; + } + + return getContext().getELFSection(Name.str(), + getELFSectionType(Name.str(), Kind), + Flags, Kind, 0, Group); + } + + if (Kind.isText()) return TextSection; + + if (Kind.isMergeable1ByteCString() || + Kind.isMergeable2ByteCString() || + Kind.isMergeable4ByteCString()) { + + // We also need alignment here. + // FIXME: this is getting the alignment of the character, not the + // alignment of the global! + unsigned Align = + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)); + + const char *SizeSpec = ".rodata.str1."; + if (Kind.isMergeable2ByteCString()) + SizeSpec = ".rodata.str2."; + else if (Kind.isMergeable4ByteCString()) + SizeSpec = ".rodata.str4."; + else + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + + + std::string Name = SizeSpec + utostr(Align); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | + ELF::SHF_MERGE | + ELF::SHF_STRINGS, + Kind); + } + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + return ReadOnlySection; // .const + } + + if (Kind.isReadOnly()) return ReadOnlySection; + + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isThreadBSS()) return TLSBSSSection; + + // Note: we claim that common symbols are put in BSSSection, but they are + // really emitted with the magic .comm directive, which creates a symbol table + // entry but not a section. + if (Kind.isBSS() || Kind.isCommon()) return BSSSection; + + if (Kind.isDataNoRel()) return DataSection; + if (Kind.isDataRelLocal()) return DataRelLocalSection; + if (Kind.isDataRel()) return DataRelSection; + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +/// getSectionForConstant - Given a mergeable constant with the +/// specified size and relocation information, return a section that it +/// should be placed in. +const MCSection *TargetLoweringObjectFileELF:: +getSectionForConstant(SectionKind Kind) const { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +const MCSection * +TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticCtorSection; + + if (UseInitArray) { + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".ctors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } +} + +const MCSection * +TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticDtorSection; + + if (UseInitArray) { + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".dtors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } +} + +void +TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { + UseInitArray = UseInitArray_; + if (!UseInitArray) + return; + + StaticCtorSection = + getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + StaticDtorSection = + getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); +} + +//===----------------------------------------------------------------------===// +// MachO +//===----------------------------------------------------------------------===// + +/// emitModuleFlags - Perform code emission for module flags. +void TargetLoweringObjectFileMachO:: +emitModuleFlags(MCStreamer &Streamer, + ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + Mangler *Mang, const TargetMachine &TM) const { + unsigned VersionVal = 0; + unsigned ImageInfoFlags = 0; + MDNode *LinkerOptions = 0; + StringRef SectionVal; + + for (ArrayRef<Module::ModuleFlagEntry>::iterator + i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { + const Module::ModuleFlagEntry &MFE = *i; + + // Ignore flags with 'Require' behavior. + if (MFE.Behavior == Module::Require) + continue; + + StringRef Key = MFE.Key->getString(); + Value *Val = MFE.Val; + + if (Key == "Objective-C Image Info Version") { + VersionVal = cast<ConstantInt>(Val)->getZExtValue(); + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated") { + ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue(); + } else if (Key == "Objective-C Image Info Section") { + SectionVal = cast<MDString>(Val)->getString(); + } else if (Key == "Linker Options") { + LinkerOptions = cast<MDNode>(Val); + } + } + + // Emit the linker options if present. + if (LinkerOptions) { + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + SmallVector<std::string, 4> StrOptions; + + // Convert to strings. + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); + StrOptions.push_back(MDOption->getString()); + } + + Streamer.EmitLinkerOptions(StrOptions); + } + } + + // The section is mandatory. If we don't have it, then we don't have GC info. + if (SectionVal.empty()) return; + + StringRef Segment, Section; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section, + TAA, TAAParsed, StubSize); + if (!ErrorCode.empty()) + // If invalid, report the error with report_fatal_error. + report_fatal_error("Invalid section specifier '" + Section + "': " + + ErrorCode + "."); + + // Get the section. + const MCSectionMachO *S = + getContext().getMachOSection(Segment, Section, TAA, StubSize, + SectionKind::getDataNoRel()); + Streamer.SwitchSection(S); + Streamer.EmitLabel(getContext(). + GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(VersionVal, 4); + Streamer.EmitIntValue(ImageInfoFlags, 4); + Streamer.AddBlankLine(); +} + +const MCSection *TargetLoweringObjectFileMachO:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // Parse the section specifier and create it if valid. + StringRef Segment, Section; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, + TAA, TAAParsed, StubSize); + if (!ErrorCode.empty()) { + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getName() + + "' has an invalid section specifier '" + + GV->getSection() + "': " + ErrorCode + "."); + } + + // Get the section. + const MCSectionMachO *S = + getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); + + // If TAA wasn't set by ParseSectionSpecifier() above, + // use the value returned by getMachOSection() as a default. + if (!TAAParsed) + TAA = S->getTypeAndAttributes(); + + // Okay, now that we got the section, verify that the TAA & StubSize agree. + // If the user declared multiple globals with different section flags, we need + // to reject it here. + if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getName() + + "' section type or attributes does not match previous" + " section specifier"); + } + + return S; +} + +const MCSection *TargetLoweringObjectFileMachO:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + + if (Kind.isText()) + return GV->isWeakForLinker() ? TextCoalSection : TextSection; + + // If this is weak/linkonce, put this in a coalescable section, either in text + // or data depending on if it is writable. + if (GV->isWeakForLinker()) { + if (Kind.isReadOnly()) + return ConstTextCoalSection; + return DataCoalSection; + } + + // FIXME: Alignment check should be handled by section classifier. + if (Kind.isMergeable1ByteCString() && + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + return CStringSection; + + // Do not put 16-bit arrays in the UString section if they have an + // externally visible label, this runs into issues with certain linker + // versions. + if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + return UStringSection; + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + } + + // Otherwise, if it is readonly, but not something we can specially optimize, + // just drop it in .const. + if (Kind.isReadOnly()) + return ReadOnlySection; + + // If this is marked const, put it into a const section. But if the dynamic + // linker needs to write to it, put it in the data segment. + if (Kind.isReadOnlyWithRel()) + return ConstDataSection; + + // Put zero initialized globals with strong external linkage in the + // DATA, __common section with the .zerofill directive. + if (Kind.isBSSExtern()) + return DataCommonSection; + + // Put zero initialized globals with local linkage in __DATA,__bss directive + // with the .zerofill directive (aka .lcomm). + if (Kind.isBSSLocal()) + return DataBSSSection; + + // Otherwise, just drop the variable in the normal data section. + return DataSection; +} + +const MCSection * +TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { + // If this constant requires a relocation, we have to put it in the data + // segment, not in the text segment. + if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) + return ConstDataSection; + + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + return ReadOnlySection; // .const +} + +/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide +/// not to emit the UsedDirective for some symbols in llvm.used. +// FIXME: REMOVE this (rdar://7071300) +bool TargetLoweringObjectFileMachO:: +shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { + /// On Darwin, internally linked data beginning with "L" or "l" does not have + /// the directive emitted (this occurs in ObjC metadata). + if (!GV) return false; + + // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. + if (GV->hasLocalLinkage() && !isa<Function>(GV)) { + // FIXME: ObjC metadata is currently emitted as internal symbols that have + // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and + // this horrible hack can go away. + MCSymbol *Sym = Mang->getSymbol(GV); + if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') + return false; + } + + return true; +} + +const MCExpr *TargetLoweringObjectFileMachO:: +getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const { + // The mach-o version of this method defaults to returning a stub reference. + + if (Encoding & DW_EH_PE_indirect) { + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = + GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : + MachOMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return TargetLoweringObjectFile:: + getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()), + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + } + + return TargetLoweringObjectFile:: + getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); +} + +MCSymbol *TargetLoweringObjectFileMachO:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + // The mach-o version of this method defaults to returning a stub reference. + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return SSym; +} + +//===----------------------------------------------------------------------===// +// COFF +//===----------------------------------------------------------------------===// + +static unsigned +getCOFFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (K.isMetadata()) + Flags |= + COFF::IMAGE_SCN_MEM_DISCARDABLE; + else if (K.isText()) + Flags |= + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_CNT_CODE; + else if (K.isBSS ()) + Flags |= + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + else if (K.isThreadLocal()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + else if (K.isReadOnly()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ; + else if (K.isWriteable()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + + return Flags; +} + +const MCSection *TargetLoweringObjectFileCOFF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + int Selection = 0; + unsigned Characteristics = getCOFFSectionFlags(Kind); + SmallString<128> Name(GV->getSection().c_str()); + if (GV->isWeakForLinker()) { + Selection = COFF::IMAGE_COMDAT_SELECT_ANY; + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + MCSymbol *Sym = Mang->getSymbol(GV); + Name.append("$"); + Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + } + return getContext().getCOFFSection(Name, + Characteristics, + Selection, + Kind); +} + +static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text$"; + if (Kind.isBSS ()) + return ".bss$"; + if (Kind.isThreadLocal()) { + // 'LLVM' is just an arbitary string to ensure that the section name gets + // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. + return ".tls$LLVM"; + } + if (Kind.isWriteable()) + return ".data$"; + return ".rdata$"; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker()) { + const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + MCSymbol *Sym = Mang->getSymbol(GV); + Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + + unsigned Characteristics = getCOFFSectionFlags(Kind); + + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + + return getContext().getCOFFSection(Name.str(), Characteristics, + COFF::IMAGE_COMDAT_SELECT_ANY, Kind); + } + + if (Kind.isText()) + return getTextSection(); + + if (Kind.isThreadLocal()) + return getTLSDataSection(); + + return getDataSection(); +} + diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp new file mode 100644 index 0000000..0f59d01 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -0,0 +1,52 @@ +//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the methods in the TargetOptions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +/// DisableFramePointerElim - This returns true if frame pointer elimination +/// optimization should be disabled for the given machine function. +bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { + // Check to see if we should eliminate non-leaf frame pointers and then + // check to see if we should eliminate all frame pointers. + if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasCalls(); + } + + return NoFramePointerElim; +} + +/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option +/// is specified on the command line. When this flag is off(default), the +/// code generator is not allowed to generate mad (multiply add) if the +/// result is "less precise" than doing those operations individually. +bool TargetOptions::LessPreciseFPMAD() const { + return UnsafeFPMath || LessPreciseFPMADOption; +} + +/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume +/// that the rounding mode of the FPU can change from its default. +bool TargetOptions::HonorSignDependentRoundingFPMath() const { + return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; +} + +/// getTrapFunctionName - If this returns a non-empty string, this means isel +/// should lower Intrinsic::trap to a call to the specified function name +/// instead of an ISD::TRAP node. +StringRef TargetOptions::getTrapFunctionName() const { + return TrapFuncName; +} + diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp new file mode 100644 index 0000000..84b4bfc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -0,0 +1,285 @@ +//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetRegisterInfo interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, + regclass_iterator RCB, regclass_iterator RCE, + const char *const *SRINames, + const unsigned *SRILaneMasks) + : InfoDesc(ID), SubRegIndexNames(SRINames), + SubRegIndexLaneMasks(SRILaneMasks), + RegClassBegin(RCB), RegClassEnd(RCE) { +} + +TargetRegisterInfo::~TargetRegisterInfo() {} + +void PrintReg::print(raw_ostream &OS) const { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); + else + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } +} + +void PrintRegUnit::print(raw_ostream &OS) const { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } + + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } + + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); +} + +/// getAllocatableClass - Return the maximal subclass of the given register +/// class that is alloctable, or NULL. +const TargetRegisterClass * +TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { + if (!RC || RC->isAllocatable()) + return RC; + + const unsigned *SubClass = RC->getSubClassMask(); + for (unsigned Base = 0, BaseE = getNumRegClasses(); + Base < BaseE; Base += 32) { + unsigned Idx = Base; + for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { + unsigned Offset = CountTrailingZeros_32(Mask); + const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); + if (SubRC->isAllocatable()) + return SubRC; + Mask >>= Offset; + Idx += Offset + 1; + } + } + return NULL; +} + +/// getMinimalPhysRegClass - Returns the Register Class of a physical +/// register of the given type, picking the most sub register class of +/// the right type that contains this physreg. +const TargetRegisterClass * +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { + assert(isPhysicalRegister(reg) && "reg must be a physical register"); + + // Pick the most sub register class of the right type that contains + // this physreg. + const TargetRegisterClass* BestRC = 0; + for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ + const TargetRegisterClass* RC = *I; + if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && + (!BestRC || BestRC->hasSubClass(RC))) + BestRC = RC; + } + + assert(BestRC && "Couldn't find the register class"); + return BestRC; +} + +/// getAllocatableSetForRC - Toggle the bits that represent allocatable +/// registers for the specific register class. +static void getAllocatableSetForRC(const MachineFunction &MF, + const TargetRegisterClass *RC, BitVector &R){ + assert(RC->isAllocatable() && "invalid for nonallocatable sets"); + ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF); + for (unsigned i = 0; i != Order.size(); ++i) + R.set(Order[i]); +} + +BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC) const { + BitVector Allocatable(getNumRegs()); + if (RC) { + // A register class with no allocatable subclass returns an empty set. + const TargetRegisterClass *SubClass = getAllocatableClass(RC); + if (SubClass) + getAllocatableSetForRC(MF, SubClass, Allocatable); + } else { + for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), + E = regclass_end(); I != E; ++I) + if ((*I)->isAllocatable()) + getAllocatableSetForRC(MF, *I, Allocatable); + } + + // Mask out the reserved registers + BitVector Reserved = getReservedRegs(MF); + Allocatable &= Reserved.flip(); + + return Allocatable; +} + +static inline +const TargetRegisterClass *firstCommonClass(const uint32_t *A, + const uint32_t *B, + const TargetRegisterInfo *TRI) { + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) + if (unsigned Common = *A++ & *B++) + return TRI->getRegClass(I + CountTrailingZeros_32(Common)); + return 0; +} + +const TargetRegisterClass * +TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, + const TargetRegisterClass *B) const { + // First take care of the trivial cases. + if (A == B) + return A; + if (!A || !B) + return 0; + + // Register classes are ordered topologically, so the largest common + // sub-class it the common sub-class with the smallest ID. + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); +} + +const TargetRegisterClass * +TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned Idx) const { + assert(A && B && "Missing register class"); + assert(Idx && "Bad sub-register index"); + + // Find Idx in the list of super-register indices. + for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI) + if (RCI.getSubReg() == Idx) + // The bit mask contains all register classes that are projected into B + // by Idx. Find a class that is also a sub-class of A. + return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this); + return 0; +} + +const TargetRegisterClass *TargetRegisterInfo:: +getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, + const TargetRegisterClass *RCB, unsigned SubB, + unsigned &PreA, unsigned &PreB) const { + assert(RCA && SubA && RCB && SubB && "Invalid arguments"); + + // Search all pairs of sub-register indices that project into RCA and RCB + // respectively. This is quadratic, but usually the sets are very small. On + // most targets like X86, there will only be a single sub-register index + // (e.g., sub_16bit projecting into GR16). + // + // The worst case is a register class like DPR on ARM. + // We have indices dsub_0..dsub_7 projecting into that class. + // + // It is very common that one register class is a sub-register of the other. + // Arrange for RCA to be the larger register so the answer will be found in + // the first iteration. This makes the search linear for the most common + // case. + const TargetRegisterClass *BestRC = 0; + unsigned *BestPreA = &PreA; + unsigned *BestPreB = &PreB; + if (RCA->getSize() < RCB->getSize()) { + std::swap(RCA, RCB); + std::swap(SubA, SubB); + std::swap(BestPreA, BestPreB); + } + + // Also terminate the search one we have found a register class as small as + // RCA. + unsigned MinSize = RCA->getSize(); + + for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { + unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); + for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) { + // Check if a common super-register class exists for this index pair. + const TargetRegisterClass *RC = + firstCommonClass(IA.getMask(), IB.getMask(), this); + if (!RC || RC->getSize() < MinSize) + continue; + + // The indexes must compose identically: PreA+SubA == PreB+SubB. + unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB); + if (FinalA != FinalB) + continue; + + // Is RC a better candidate than BestRC? + if (BestRC && RC->getSize() >= BestRC->getSize()) + continue; + + // Yes, RC is the smallest super-register seen so far. + BestRC = RC; + *BestPreA = IA.getSubReg(); + *BestPreB = IB.getSubReg(); + + // Bail early if we reached MinSize. We won't find a better candidate. + if (BestRC->getSize() == MinSize) + return BestRC; + } + } + return BestRC; +} + +// Compute target-independent register allocator hints to help eliminate copies. +void +TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg); + + // Hints with HintType != 0 were set by target-dependent code. + // Such targets must provide their own implementation of + // TRI::getRegAllocationHints to interpret those hint types. + assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Hint.second; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + return; + if (MRI.isReserved(Phys)) + return; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (std::find(Order.begin(), Order.end(), Phys) == Order.end()) + return; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); +} diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp new file mode 100644 index 0000000..783bfa1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -0,0 +1,309 @@ +//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a wrapper around MCSchedModel that allows the interface +// to benefit from information currently only available in TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), + cl::desc("Use TargetSchedModel for latency lookup")); + +static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), + cl::desc("Use InstrItineraryData for latency lookup")); + +bool TargetSchedModel::hasInstrSchedModel() const { + return EnableSchedModel && SchedModel.hasInstrSchedModel(); +} + +bool TargetSchedModel::hasInstrItineraries() const { + return EnableSchedItins && !InstrItins.isEmpty(); +} + +static unsigned gcd(unsigned Dividend, unsigned Divisor) { + // Dividend and Divisor will be naturally swapped as needed. + while(Divisor) { + unsigned Rem = Dividend % Divisor; + Dividend = Divisor; + Divisor = Rem; + }; + return Dividend; +} +static unsigned lcm(unsigned A, unsigned B) { + unsigned LCM = (uint64_t(A) * B) / gcd(A, B); + assert((LCM >= A && LCM >= B) && "LCM overflow"); + return LCM; +} + +void TargetSchedModel::init(const MCSchedModel &sm, + const TargetSubtargetInfo *sti, + const TargetInstrInfo *tii) { + SchedModel = sm; + STI = sti; + TII = tii; + STI->initInstrItins(InstrItins); + + unsigned NumRes = SchedModel.getNumProcResourceKinds(); + ResourceFactors.resize(NumRes); + ResourceLCM = SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + if (NumUnits > 0) + ResourceLCM = lcm(ResourceLCM, NumUnits); + } + MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0; + } +} + +unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, + const MCSchedClassDesc *SC) const { + if (hasInstrItineraries()) { + int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); + } + if (hasInstrSchedModel()) { + if (!SC) + SC = resolveSchedClass(MI); + if (SC->isValid()) + return SC->NumMicroOps; + } + return MI->isTransient() ? 0 : 1; +} + +// The machine model may explicitly specify an invalid latency, which +// effectively means infinite latency. Since users of the TargetSchedule API +// don't know how to handle this, we convert it to a very large latency that is +// easy to distinguish when debugging the DAG but won't induce overflow. +static unsigned convertLatency(int Cycles) { + return Cycles >= 0 ? Cycles : 1000; +} + +/// If we can determine the operand latency from the def only, without machine +/// model or itinerary lookup, do so. Otherwise return -1. +int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, + bool FindMin) const { + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is invalid, then use the itinerary for MinLatency. If no + // itinerary exists either, then use single cycle latency. + if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { + return 1; + } + return SchedModel.MinLatency; + } + else if (!hasInstrSchedModel() && !hasInstrItineraries()) { + return TII->defaultDefLatency(&SchedModel, DefMI); + } + // ...operand lookup required + return -1; +} + +/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require +/// evaluation of predicates that depend on instruction operands or flags. +const MCSchedClassDesc *TargetSchedModel:: +resolveSchedClass(const MachineInstr *MI) const { + + // Get the definition's scheduling class descriptor from this machine model. + unsigned SchedClass = MI->getDesc().getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + +#ifndef NDEBUG + unsigned NIter = 0; +#endif + while (SCDesc->isVariant()) { + assert(++NIter < 6 && "Variants are nested deeper than the magic number"); + + SchedClass = STI->resolveSchedClass(SchedClass, MI, this); + SCDesc = SchedModel.getSchedClassDesc(SchedClass); + } + return SCDesc; +} + +/// Find the def index of this operand. This index maps to the machine model and +/// is independent of use operands. Def operands may be reordered with uses or +/// merged with uses without affecting the def index (e.g. before/after +/// regalloc). However, an instruction's def operands must never be reordered +/// with respect to each other. +static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { + unsigned DefIdx = 0; + for (unsigned i = 0; i != DefOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) + ++DefIdx; + } + return DefIdx; +} + +/// Find the use index of this operand. This is independent of the instruction's +/// def operands. +/// +/// Note that uses are not determined by the operand's isUse property, which +/// is simply the inverse of isDef. Here we consider any readsReg operand to be +/// a "use". The machine model allows an operand to be both a Def and Use. +static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { + unsigned UseIdx = 0; + for (unsigned i = 0; i != UseOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.readsReg()) + ++UseIdx; + } + return UseIdx; +} + +// Top-level API for clients that know the operand indices. +unsigned TargetSchedModel::computeOperandLatency( + const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *UseMI, unsigned UseOperIdx, + bool FindMin) const { + + int DefLatency = getDefLatency(DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + if (hasInstrItineraries()) { + int OperLatency = 0; + if (UseMI) { + OperLatency = + TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + } + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + // Rather than directly querying InstrItins stage latency, we call a TII + // hook to allow subtargets to specialize latency. This hook is only + // applicable to the InstrItins model. InstrSchedModel should model all + // special cases without TII hooks. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); + return InstrLatency; + } + assert(!FindMin && hasInstrSchedModel() && + "Expected a SchedModel for this cpu"); + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); + if (DefIdx < SCDesc->NumWriteLatencyEntries) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + unsigned WriteID = WLEntry->WriteResourceID; + unsigned Latency = convertLatency(WLEntry->Cycles); + if (!UseMI) + return Latency; + + // Lookup the use's latency adjustment in SubtargetInfo. + const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); + if (UseDesc->NumReadAdvanceEntries == 0) + return Latency; + unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); + return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + } + // If DefIdx does not exist in the model (e.g. implicit defs), then return + // unit latency (defaultDefLatency may be too conservative). +#ifndef NDEBUG + if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + std::string Err; + raw_string_ostream ss(Err); + ss << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI; + report_fatal_error(ss.str()); + } +#endif + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); +} + +unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { + // For the itinerary model, fall back to the old subtarget hook. + // Allow subtargets to compute Bundle latencies outside the machine model. + if (hasInstrItineraries() || MI->isBundle()) + return TII->getInstrLatency(&InstrItins, MI); + + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); + if (SCDesc->isValid()) { + unsigned Latency = 0; + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); + } + return Latency; + } + } + return TII->defaultDefLatency(&SchedModel, MI); +} + +unsigned TargetSchedModel:: +computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *DepMI) const { + // MinLatency == -1 is for in-order processors that always have unit + // MinLatency. MinLatency > 0 is for in-order processors with varying min + // latencies, but since this is not a RAW dep, we always use unit latency. + if (SchedModel.MinLatency != 0) + return 1; + + // MinLatency == 0 indicates an out-of-order processor that can dispatch + // WAW dependencies in the same cycle. + + // Treat predication as a data dependency for out-of-order cpus. In-order + // cpus do not need to treat predicated writes specially. + // + // TODO: The following hack exists because predication passes do not + // correctly append imp-use operands, and readsReg() strangely returns false + // for predicated defs. + unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); + const MachineFunction &MF = *DefMI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) + return computeInstrLatency(DefMI); + + // If we have a per operand scheduling model, check if this def is writing + // an unbuffered resource. If so, it treated like an in-order cpu. + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + if (SCDesc->isValid()) { + for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), + *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + return 1; + } + } + } + return 0; +} diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp new file mode 100644 index 0000000..e6dfe10 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -0,0 +1,1678 @@ +//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TwoAddress instruction pass which is used +// by most register allocators. Two-Address instructions are rewritten +// from: +// +// A = B op C +// +// to: +// +// A = B +// A op= C +// +// Note that if a register allocator chooses to use this pass, that it +// has to be capable of handling the non-SSA nature of these rewritten +// virtual registers. +// +// It is also worth noting that the duplicate operand of the two +// address instruction is removed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "twoaddrinstr" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); +STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); +STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); +STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); +STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); +STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); +STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); + +namespace { +class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; + MachineRegisterInfo *MRI; + LiveVariables *LV; + LiveIntervals *LIS; + AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; + + // The current basic block being processed. + MachineBasicBlock *MBB; + + // DistanceMap - Keep track the distance of a MI from the start of the + // current basic block. + DenseMap<MachineInstr*, unsigned> DistanceMap; + + // Set of already processed instructions in the current block. + SmallPtrSet<MachineInstr*, 8> Processed; + + // SrcRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies from physical registers to + // virtual registers. e.g. v1024 = move r0. + DenseMap<unsigned, unsigned> SrcRegMap; + + // DstRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies to physical registers from + // virtual registers. e.g. r1 = move v1024. + DenseMap<unsigned, unsigned> DstRegMap; + + bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, + MachineBasicBlock::iterator OldPos); + + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); + + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist); + + bool commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist); + + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); + + bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned RegA, unsigned RegB, unsigned Dist); + + bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI); + + bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + + bool tryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist, bool shouldOnlyCommute); + + void scanUses(unsigned DstReg); + + void processCopy(MachineInstr *MI); + + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + void eliminateRegSequence(MachineBasicBlock::iterator&); + +public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - Pass entry point. + bool runOnMachineFunction(MachineFunction&); +}; +} // end anonymous namespace + +char TwoAddressInstructionPass::ID = 0; +INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) + +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; + +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); + +/// sink3AddrInstruction - A two-address instruction has been converted to a +/// three-address instruction to avoid clobbering a register. Try to sink it +/// past the instruction that would kill the above mentioned register to reduce +/// register pressure. +bool TwoAddressInstructionPass:: +sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { + // FIXME: Shouldn't we be trying to do this before we three-addressify the + // instruction? After this transformation is done, we no longer need + // the instruction to be in three-address form. + + // Check if it's safe to move this instruction. + bool SeenStore = true; // Be conservative. + if (!MI->isSafeToMove(TII, AA, SeenStore)) + return false; + + unsigned DefReg = 0; + SmallSet<unsigned, 4> UseRegs; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse() && MOReg != SavedReg) + UseRegs.insert(MO.getReg()); + if (!MO.isDef()) + continue; + if (MO.isImplicit()) + // Don't try to move it if it implicitly defines a register. + return false; + if (DefReg) + // For now, don't move any instructions that define multiple registers. + return false; + DefReg = MO.getReg(); + } + + // Find the instruction that kills SavedReg. + MachineInstr *KillMI = NULL; + if (LIS) { + LiveInterval &LI = LIS->getInterval(SavedReg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } + if (!KillMI) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SavedReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } + } + + // If we find the instruction that kills SavedReg, and it is in an + // appropriate location, we can try to sink the current instruction + // past it. + if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || + KillMI == OldPos || KillMI->isTerminator()) + return false; + + // If any of the definitions are used by another instruction between the + // position and the kill use, then it's not safe to sink it. + // + // FIXME: This can be sped up if there is an easy way to query whether an + // instruction is before or after another instruction. Then we can use + // MachineRegisterInfo def / use instead. + MachineOperand *KillMO = NULL; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + + unsigned NumVisited = 0; + for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (DefReg == MOReg) + return false; + + if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) { + if (OtherMI == KillMI && MOReg == SavedReg) + // Save the operand that kills the register. We want to unset the kill + // marker if we can sink MI past it. + KillMO = &MO; + else if (UseRegs.count(MOReg)) + // One of the uses is killed before the destination. + return false; + } + } + } + assert(KillMO && "Didn't find kill"); + + if (!LIS) { + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); + + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + } + + // Move instruction to its destination. + MBB->remove(MI); + MBB->insert(KillPos, MI); + + if (LIS) + LIS->handleMove(MI); + + ++Num3AddrSunk; + return true; +} + +/// noUseAfterLastDef - Return true if there are no intervening uses between the +/// last instruction in the MBB that defines the specified register and the +/// two-address instruction which is being processed. It also returns the last +/// def location by reference +bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, + unsigned &LastDef) { + LastDef = 0; + unsigned LastUse = Dist; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), + E = MRI->reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB || MI->isDebugValue()) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (MO.isUse() && DI->second < LastUse) + LastUse = DI->second; + if (MO.isDef() && DI->second > LastDef) + LastDef = DI->second; + } + + return !(LastUse > LastDef && LastUse < Dist); +} + +/// isCopyToReg - Return true if the specified MI is a copy instruction or +/// a extract_subreg instruction. It also returns the source and destination +/// registers and whether they are physical registers by reference. +static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, + unsigned &SrcReg, unsigned &DstReg, + bool &IsSrcPhys, bool &IsDstPhys) { + SrcReg = 0; + DstReg = 0; + if (MI.isCopy()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.isInsertSubreg() || MI.isSubregToReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else + return false; + + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; +} + +/// isPLainlyKilled - Test if the given register value, which is used by the +// given instruction, is killed by the given instruction. +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, + LiveIntervals *LIS) { + if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && + !LIS->isNotInMIMap(MI)) { + // FIXME: Sometimes tryInstructionTransform() will add instructions and + // test whether they can be folded before keeping them. In this case it + // sets a kill before recursively calling tryInstructionTransform() again. + // If there is no interval available, we assume that this instruction is + // one of those. A kill flag is manually inserted on the operand so the + // check below will handle it. + LiveInterval &LI = LIS->getInterval(Reg); + // This is to match the kill flag version where undefs don't have kill + // flags. + if (!LI.hasAtLeastOneValue()) + return false; + + SlotIndex useIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(useIdx); + assert(I != LI.end() && "Reg must be live-in to use."); + return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx); + } + + return MI->killsRegister(Reg); +} + +/// isKilled - Test if the given register value, which is used by the given +/// instruction, is killed by the given instruction. This looks through +/// coalescable copies to see if the original value is potentially not killed. +/// +/// For example, in this code: +/// +/// %reg1034 = copy %reg1024 +/// %reg1035 = copy %reg1025<kill> +/// %reg1036 = add %reg1034<kill>, %reg1035<kill> +/// +/// %reg1034 is not considered to be killed, since it is copied from a +/// register which is not killed. Treating it as not killed lets the +/// normal heuristics commute the (two-address) add, which lets +/// coalescing eliminate the extra copy. +/// +/// If allowFalsePositives is true then likely kills are treated as kills even +/// if it can't be proven that they are kills. +static bool isKilled(MachineInstr &MI, unsigned Reg, + const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + LiveIntervals *LIS, + bool allowFalsePositives) { + MachineInstr *DefMI = &MI; + for (;;) { + // All uses of physical registers are likely to be kills. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + (allowFalsePositives || MRI->hasOneUse(Reg))) + return true; + if (!isPlainlyKilled(DefMI, Reg, LIS)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return true; + MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); + // If there are multiple defs, we can't do a simple analysis, so just + // go with what the kill flag says. + if (llvm::next(Begin) != MRI->def_end()) + return true; + DefMI = &*Begin; + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + // If the def is something other than a copy, then it isn't going to + // be coalesced, so follow the kill flag. + if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return true; + Reg = SrcReg; + } +} + +/// isTwoAddrUse - Return true if the specified MI uses the specified register +/// as a two-address use. If so, return the destination register by reference. +static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned NumOps = MI.isInlineAsm() + ? MI.getNumOperands() : MCID.getNumOperands(); + for (unsigned i = 0; i != NumOps; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + unsigned ti; + if (MI.isRegTiedToDefOperand(i, &ti)) { + DstReg = MI.getOperand(ti).getReg(); + return true; + } + } + return false; +} + +/// findOnlyInterestingUse - Given a register, if has a single in-basic block +/// use, return the use instruction if it's a copy or a two-address use. +static +MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + bool &IsCopy, + unsigned &DstReg, bool &IsDstPhys) { + if (!MRI->hasOneNonDBGUse(Reg)) + // None or more than one use. + return 0; + MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg); + if (UseMI.getParent() != MBB) + return 0; + unsigned SrcReg; + bool IsSrcPhys; + if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { + IsCopy = true; + return &UseMI; + } + IsDstPhys = false; + if (isTwoAddrUse(UseMI, Reg, DstReg)) { + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return &UseMI; + } + return 0; +} + +/// getMappedReg - Return the physical register the specified virtual register +/// might be mapped to. +static unsigned +getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { + while (TargetRegisterInfo::isVirtualRegister(Reg)) { + DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg); + if (SI == RegMap.end()) + return 0; + Reg = SI->second; + } + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg; + return 0; +} + +/// regsAreCompatible - Return true if the two registers are equal or aliased. +/// +static bool +regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { + if (RegA == RegB) + return true; + if (!RegA || !RegB) + return false; + return TRI->regsOverlap(RegA, RegB); +} + + +/// isProfitableToCommute - Return true if it's potentially profitable to commute +/// the two-address instruction that's being processed. +bool +TwoAddressInstructionPass:: +isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist) { + if (OptLevel == CodeGenOpt::None) + return false; + + // Determine if it's profitable to commute this two address instruction. In + // general, we want no uses between this instruction and the definition of + // the two-address register. + // e.g. + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1028 + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // In this case, it might not be possible to coalesce the second MOV8rr + // instruction if the first one is coalesced. So it would be profitable to + // commute it: + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1029 + // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> + + if (!isPlainlyKilled(MI, regC, LIS)) + return false; + + // Ok, we have something like: + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // let's see if it's worth commuting it. + + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r0 = MOV %reg1026 + // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. + unsigned ToRegA = getMappedReg(regA, DstRegMap); + if (ToRegA) { + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI); + bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI); + if (BComp != CComp) + return !BComp && CComp; + } + + // If there is a use of regC between its last def (could be livein) and this + // instruction, then bail. + unsigned LastDefC = 0; + if (!noUseAfterLastDef(regC, Dist, LastDefC)) + return false; + + // If there is a use of regB between its last def (could be livein) and this + // instruction, then go ahead and make this transformation. + unsigned LastDefB = 0; + if (!noUseAfterLastDef(regB, Dist, LastDefB)) + return true; + + // Since there are no intervening uses for both registers, then commute + // if the def of regC is closer. Its live interval is shorter. + return LastDefB && LastDefC && LastDefC > LastDefB; +} + +/// commuteInstruction - Commute a two-address instruction and update the basic +/// block, distance map, and live variables if needed. Return true if it is +/// successful. +bool TwoAddressInstructionPass:: +commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist) { + MachineInstr *MI = mi; + DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); + MachineInstr *NewMI = TII->commuteInstruction(MI); + + if (NewMI == 0) { + DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); + return false; + } + + DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); + assert(NewMI == MI && + "TargetInstrInfo::commuteInstruction() should not return a new " + "instruction unless it was requested."); + + // Update source register map. + unsigned FromRegC = getMappedReg(RegC, SrcRegMap); + if (FromRegC) { + unsigned RegA = MI->getOperand(0).getReg(); + SrcRegMap[RegA] = FromRegC; + } + + return true; +} + +/// isProfitableToConv3Addr - Return true if it is profitable to convert the +/// given 2-address instruction to a 3-address one. +bool +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r2 = MOV %reg1026 + // Turn ADD into a 3-address instruction to avoid a copy. + unsigned FromRegB = getMappedReg(RegB, SrcRegMap); + if (!FromRegB) + return false; + unsigned ToRegA = getMappedReg(RegA, DstRegMap); + return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); +} + +/// convertInstTo3Addr - Convert the specified two-address instruction into a +/// three address one. Return true if this transformation was successful. +bool +TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned RegA, unsigned RegB, + unsigned Dist) { + // FIXME: Why does convertToThreeAddress() need an iterator reference? + MachineFunction::iterator MFI = MBB; + MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); + assert(MBB == MFI && "convertToThreeAddress changed iterator reference"); + if (!NewMI) + return false; + + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + bool Sunk = false; + + if (LIS) + LIS->ReplaceMachineInstrInMaps(mi, NewMI); + + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = sink3AddrInstruction(NewMI, RegB, mi); + + MBB->erase(mi); // Nuke the old inst. + + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = llvm::next(mi); + } + + // Update source and destination register maps. + SrcRegMap.erase(RegA); + DstRegMap.erase(RegB); + return true; +} + +/// scanUses - Scan forward recursively for only uses, update maps if the use +/// is a copy or a two-address instruction. +void +TwoAddressInstructionPass::scanUses(unsigned DstReg) { + SmallVector<unsigned, 4> VirtRegPairs; + bool IsDstPhys; + bool IsCopy = false; + unsigned NewReg = 0; + unsigned Reg = DstReg; + while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, + NewReg, IsDstPhys)) { + if (IsCopy && !Processed.insert(UseMI)) + break; + + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); + VirtRegPairs.push_back(NewReg); + Reg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); + ToReg = FromReg; + } + bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!"); + } +} + +/// processCopy - If the specified instruction is not yet processed, process it +/// if it's a copy. For a copy instruction, we find the physical registers the +/// source and destination registers might be mapped to. These are kept in +/// point-to maps used to determine future optimizations. e.g. +/// v1024 = mov r0 +/// v1025 = mov r1 +/// v1026 = add v1024, v1025 +/// r1 = mov r1026 +/// If 'add' is a two-address instruction, v1024, v1026 are both potentially +/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is +/// potentially joined with r1 on the output side. It's worthwhile to commute +/// 'add' to eliminate a copy. +void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { + if (Processed.count(MI)) + return; + + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return; + + if (IsDstPhys && !IsSrcPhys) + DstRegMap.insert(std::make_pair(SrcReg, DstReg)); + else if (!IsDstPhys && IsSrcPhys) { + bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second; + if (!isNew) + assert(SrcRegMap[DstReg] == SrcReg && + "Can't map to two src physical registers!"); + + scanUses(DstReg); + } + + Processed.insert(MI); + return; +} + +/// rescheduleMIBelowKill - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill +/// instruction in order to eliminate the need for the copy. +bool TwoAddressInstructionPass:: +rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) + return false; + + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() || + KillMI->isBranch() || KillMI->isTerminator()) + // Don't move pass calls, etc. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!MI->isSafeToMove(TII, AA, SeenStore)) + return false; + + if (TII->getInstrLatency(InstrItins, MI) > 1) + // FIXME: Needs more sophisticated heuristics. + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; + SmallSet<unsigned, 2> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) + Defs.insert(MOReg); + else { + Uses.insert(MOReg); + if (MOReg != Reg && (MO.isKill() || + (LIS && isPlainlyKilled(MI, MOReg, LIS)))) + Kills.insert(MOReg); + } + } + + // Move the copies connected to MI down as well. + MachineBasicBlock::iterator Begin = MI; + MachineBasicBlock::iterator AfterMI = llvm::next(Begin); + + MachineBasicBlock::iterator End = AfterMI; + while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { + Defs.insert(End->getOperand(0).getReg()); + ++End; + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) { + if (Uses.count(MOReg)) + // Physical register use would be clobbered. + return false; + if (!MO.isDead() && Defs.count(MOReg)) + // May clobber a physical register def. + // FIXME: This may be too conservative. It's ok if the instruction + // is sunken completely below the use. + return false; + } else { + if (Defs.count(MOReg)) + return false; + bool isKill = MO.isKill() || + (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)); + if (MOReg != Reg && + ((isKill && Uses.count(MOReg)) || Kills.count(MOReg))) + // Don't want to extend other live ranges and update kills. + return false; + if (MOReg == Reg && !isKill) + // We can't schedule across a use of the register in question. + return false; + // Ensure that if this is register in question, its the kill we expect. + assert((MOReg != Reg || OtherMI == KillMI) && + "Found multiple kills of a register in a basic block"); + } + } + } + + // Move debug info as well. + while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue()) + --Begin; + + nmi = End; + MachineBasicBlock::iterator InsertPos = KillPos; + if (LIS) { + // We have to move the copies first so that the MBB is still well-formed + // when calling handleMove(). + for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) { + MachineInstr *CopyMI = MBBI; + ++MBBI; + MBB->splice(InsertPos, MBB, CopyMI); + LIS->handleMove(CopyMI); + InsertPos = CopyMI; + } + End = llvm::next(MachineBasicBlock::iterator(MI)); + } + + // Copies following MI may have been moved as well. + MBB->splice(InsertPos, MBB, Begin, End); + DistanceMap.erase(DI); + + // Update live variables + if (LIS) { + LIS->handleMove(MI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } + + DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); + return true; +} + +/// isDefTooClose - Return true if the re-scheduling will put the given +/// instruction too close to the defs of its register dependencies. +bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { + MachineInstr *DefMI = &*DI; + if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike()) + continue; + if (DefMI == MI) + return true; // MI is defining something KillMI uses + DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI); + if (DDI == DistanceMap.end()) + return true; // Below MI + unsigned DefDist = DDI->second; + assert(Dist > DefDist && "Visited def already?"); + if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist)) + return true; + } + return false; +} + +/// rescheduleKillAboveMI - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the +/// current two-address instruction in order to eliminate the need for the +/// copy. +bool TwoAddressInstructionPass:: +rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) + return false; + + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!KillMI->isSafeToMove(TII, AA, SeenStore)) + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; + SmallSet<unsigned, 2> Defs; + SmallSet<unsigned, 2> LiveDefs; + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + if (!MOReg) + continue; + if (isDefTooClose(MOReg, DI->second, MI)) + return false; + bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS)); + if (MOReg == Reg && !isKill) + return false; + Uses.insert(MOReg); + if (isKill && MOReg != Reg) + Kills.insert(MOReg); + } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Defs.insert(MOReg); + if (!MO.isDead()) + LiveDefs.insert(MOReg); + } + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + SmallVector<unsigned, 2> OtherDefs; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse()) { + if (Defs.count(MOReg)) + // Moving KillMI can clobber the physical register if the def has + // not been seen. + return false; + if (Kills.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + if (OtherMI != MI && MOReg == Reg && + !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)))) + // We can't schedule across a use of the register in question. + return false; + } else { + OtherDefs.push_back(MOReg); + } + } + + for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) { + unsigned MOReg = OtherDefs[i]; + if (Uses.count(MOReg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + LiveDefs.count(MOReg)) + return false; + // Physical register def is seen. + Defs.erase(MOReg); + } + } + + // Move the old kill above MI, don't forget to move debug info as well. + MachineBasicBlock::iterator InsertPos = mi; + while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue()) + --InsertPos; + MachineBasicBlock::iterator From = KillMI; + MachineBasicBlock::iterator To = llvm::next(From); + while (llvm::prior(From)->isDebugValue()) + --From; + MBB->splice(InsertPos, MBB, From, To); + + nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. + DistanceMap.erase(DI); + + // Update live variables + if (LIS) { + LIS->handleMove(KillMI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } + + DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); + return true; +} + +/// tryInstructionTransform - For the case where an instruction has a single +/// pair of tied register operands, attempt some transformations that may +/// either eliminate the tied operands or improve the opportunities for +/// coalescing away the register copy. Returns true if no copy needs to be +/// inserted to untie mi's operands (either because they were untied, or +/// because mi was rescheduled, and will be visited again later). If the +/// shouldOnlyCommute flag is true, only instruction commutation is attempted. +bool TwoAddressInstructionPass:: +tryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist, bool shouldOnlyCommute) { + if (OptLevel == CodeGenOpt::None) + return false; + + MachineInstr &MI = *mi; + unsigned regA = MI.getOperand(DstIdx).getReg(); + unsigned regB = MI.getOperand(SrcIdx).getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot make instruction into two-address form"); + bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); + + if (TargetRegisterInfo::isVirtualRegister(regA)) + scanUses(regA); + + // Check if it is profitable to commute the operands. + unsigned SrcOp1, SrcOp2; + unsigned regC = 0; + unsigned regCIdx = ~0U; + bool TryCommute = false; + bool AggressiveCommute = false; + if (MI.isCommutable() && MI.getNumOperands() >= 3 && + TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { + if (SrcIdx == SrcOp1) + regCIdx = SrcOp2; + else if (SrcIdx == SrcOp2) + regCIdx = SrcOp1; + + if (regCIdx != ~0U) { + regC = MI.getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false)) + // If C dies but B does not, swap the B and C operands. + // This makes the live ranges of A and C joinable. + TryCommute = true; + else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) { + TryCommute = true; + AggressiveCommute = true; + } + } + } + + // If it's profitable to commute, try to do so. + if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { + ++NumCommuted; + if (AggressiveCommute) + ++NumAggrCommuted; + return false; + } + + if (shouldOnlyCommute) + return false; + + // If there is one more use of regB later in the same MBB, consider + // re-schedule this MI below it. + if (rescheduleMIBelowKill(mi, nmi, regB)) { + ++NumReSchedDowns; + return true; + } + + if (MI.isConvertibleTo3Addr()) { + // This instruction is potentially convertible to a true + // three-address instruction. Check if it is profitable. + if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { + // Try to convert it. + if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) { + ++NumConvertedTo3Addr; + return true; // Done with this instruction. + } + } + } + + // If there is one more use of regB later in the same MBB, consider + // re-schedule it before this MI if it's legal. + if (rescheduleKillAboveMI(mi, nmi, regB)) { + ++NumReSchedUps; + return true; + } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (MI.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); + if (UnfoldMCID.getNumDefs() == 1) { + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); + const TargetRegisterClass *RC = + TRI->getAllocatableClass( + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector<MachineInstr *, 2> NewMIs; + if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + MBB->insert(mi, NewMIs[0]); + MBB->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformResult = + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); + (void)TransformResult; + assert(!TransformResult && + "tryInstructionTransform() should return false."); + if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isUse()) { + if (MO.isKill()) { + if (NewMIs[0]->killsRegister(MO.getReg())) + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]); + else { + assert(NewMIs[1]->killsRegister(MO.getReg()) && + "Kill missing after load unfold!"); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]); + } + } + } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) { + if (NewMIs[1]->registerDefIsDead(MO.getReg())) + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + else { + assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + "Dead flag missing after load unfold!"); + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + } + } + } + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + + SmallVector<unsigned, 4> OrigRegs; + if (LIS) { + for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg()) + OrigRegs.push_back(MOI->getReg()); + } + } + + MI.eraseFromParent(); + + // Update LiveIntervals. + if (LIS) { + MachineBasicBlock::iterator Begin(NewMIs[0]); + MachineBasicBlock::iterator End(NewMIs[1]); + LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs); + } + + mi = NewMIs[1]; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + + return false; +} + +// Collect tied operands of MI that need to be handled. +// Rewrite trivial cases immediately. +// Return true if any tied operands where found, including the trivial ones. +bool TwoAddressInstructionPass:: +collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { + const MCInstrDesc &MCID = MI->getDesc(); + bool AnyOps = false; + unsigned NumOps = MI->getNumOperands(); + + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + AnyOps = true; + MachineOperand &SrcMO = MI->getOperand(SrcIdx); + MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned SrcReg = SrcMO.getReg(); + unsigned DstReg = DstMO.getReg(); + // Tied constraint already satisfied? + if (SrcReg == DstReg) + continue; + + assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); + + // Deal with <undef> uses immediately - simply rewrite the src operand. + if (SrcMO.isUndef()) { + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, *MF)) + MRI->constrainRegClass(DstReg, RC); + SrcMO.setReg(DstReg); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + continue; + } + TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); + } + return AnyOps; +} + +// Process a list of tied MI operands that all use the same source register. +// The tied pairs are of the form (SrcIdx, DstIdx). +void +TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { + bool IsEarlyClobber = false; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second); + IsEarlyClobber |= DstMO.isEarlyClobber(); + } + + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + SlotIndex LastCopyIdx; + unsigned RegB = 0; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + + const MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned RegA = DstMO.getReg(); + + // Grab RegB from the instruction because it may have changed if the + // instruction was commuted. + RegB = MI->getOperand(SrcIdx).getReg(); + + if (RegA == RegB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = RegA; + + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + assert(i == DstIdx || + !MI->getOperand(i).isReg() || + MI->getOperand(i).getReg() != RegA); +#endif + + // Emit a copy. + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + + // Update DistanceMap. + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap[MI] = ++Dist; + + if (LIS) { + LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + LiveInterval &LI = LIS->getInterval(RegA); + VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + SlotIndex endIdx = + LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); + LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + } + } + + DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + + MachineOperand &MO = MI->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + + // Make sure regA is a legal regclass for the SrcIdx operand. + if (TargetRegisterInfo::isVirtualRegister(RegA) && + TargetRegisterInfo::isVirtualRegister(RegB)) + MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); + + MO.setReg(RegA); + + // Propagate SrcRegMap. + SrcRegMap[RegA] = RegB; + } + + + if (AllUsesCopied) { + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + LV->addVirtualRegisterKilled(RegB, PrevMI); + } + + // Update LiveIntervals. + if (LIS) { + LiveInterval &LI = LIS->getInterval(RegB); + SlotIndex MIIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(MIIdx); + assert(I != LI.end() && "RegB must be live-in to use."); + + SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); + if (I->end == UseIdx) + LI.removeRange(LastCopyIdx, UseIdx); + } + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } +} + +/// runOnMachineFunction - Reduce two-address instructions to two operands. +/// +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetMachine &TM = MF->getTarget(); + MRI = &MF->getRegInfo(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + InstrItins = TM.getInstrItineraryData(); + LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); + AA = &getAnalysis<AliasAnalysis>(); + OptLevel = TM.getOptLevel(); + + bool MadeChange = false; + + DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + DEBUG(dbgs() << "********** Function: " + << MF->getName() << '\n'); + + // This pass takes the function out of SSA form. + MRI->leaveSSA(); + + TiedOperandMap TiedOperands; + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MBB = MBBI; + unsigned Dist = 0; + DistanceMap.clear(); + SrcRegMap.clear(); + DstRegMap.clear(); + Processed.clear(); + for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); + mi != me; ) { + MachineBasicBlock::iterator nmi = llvm::next(mi); + if (mi->isDebugValue()) { + mi = nmi; + continue; + } + + // Expand REG_SEQUENCE instructions. This will position mi at the first + // expanded instruction. + if (mi->isRegSequence()) + eliminateRegSequence(mi); + + DistanceMap.insert(std::make_pair(mi, ++Dist)); + + processCopy(&*mi); + + // First scan through all the tied register uses in this instruction + // and record a list of pairs of tied operands for each register. + if (!collectTiedOperands(mi, TiedOperands)) { + mi = nmi; + continue; + } + + ++NumTwoAddressInstrs; + MadeChange = true; + DEBUG(dbgs() << '\t' << *mi); + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1) { + SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs + = TiedOperands.begin()->second; + if (TiedPairs.size() == 1) { + unsigned SrcIdx = TiedPairs[0].first; + unsigned DstIdx = TiedPairs[0].second; + unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); + unsigned DstReg = mi->getOperand(DstIdx).getReg(); + if (SrcReg != DstReg && + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { + // The tied operands have been eliminated or shifted further down the + // block to ease elimination. Continue processing with 'nmi'. + TiedOperands.clear(); + mi = nmi; + continue; + } + } + } + + // Now iterate over the information collected above. + for (TiedOperandMap::iterator OI = TiedOperands.begin(), + OE = TiedOperands.end(); OI != OE; ++OI) { + processTiedPairs(mi, OI->second, Dist); + DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + } + + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } + + // Clear TiedOperands here instead of at the top of the loop + // since most instructions do not have tied operands. + TiedOperands.clear(); + mi = nmi; + } + } + + if (LIS) + MF->verify(this, "After two-address instruction pass"); + + return MadeChange; +} + +/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process. +/// +/// The instruction is turned into a sequence of sub-register copies: +/// +/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1 +/// +/// Becomes: +/// +/// %dst:ssub0<def,undef> = COPY %v1 +/// %dst:ssub1<def> = COPY %v2 +/// +void TwoAddressInstructionPass:: +eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = MBBI; + unsigned DstReg = MI->getOperand(0).getReg(); + if (MI->getOperand(0).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(DstReg) || + !(MI->getNumOperands() & 1)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); + } + + SmallVector<unsigned, 4> OrigRegs; + if (LIS) { + OrigRegs.push_back(MI->getOperand(0).getReg()); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) + OrigRegs.push_back(MI->getOperand(i).getReg()); + } + + bool DefEmitted = false; + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + MachineOperand &UseMO = MI->getOperand(i); + unsigned SrcReg = UseMO.getReg(); + unsigned SubIdx = MI->getOperand(i+1).getImm(); + // Nothing needs to be inserted for <undef> operands. + if (UseMO.isUndef()) + continue; + + // Defer any kill flag to the last operand using SrcReg. Otherwise, we + // might insert a COPY that uses SrcReg after is was killed. + bool isKill = UseMO.isKill(); + if (isKill) + for (unsigned j = i + 2; j < e; j += 2) + if (MI->getOperand(j).getReg() == SrcReg) { + MI->getOperand(j).setIsKill(); + UseMO.setIsKill(false); + isKill = false; + break; + } + + // Insert the sub-register copy. + MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, SubIdx) + .addOperand(UseMO); + + // The first def needs an <undef> flag because there is no live register + // before it. + if (!DefEmitted) { + CopyMI->getOperand(0).setIsUndef(true); + // Return an iterator pointing to the first inserted instr. + MBBI = CopyMI; + } + DefEmitted = true; + + // Update LiveVariables' kill info. + if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); + + DEBUG(dbgs() << "Inserted: " << *CopyMI); + } + + MachineBasicBlock::iterator EndMBBI = + llvm::next(MachineBasicBlock::iterator(MI)); + + if (!DefEmitted) { + DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) + MI->RemoveOperand(j); + } else { + DEBUG(dbgs() << "Eliminated: " << *MI); + MI->eraseFromParent(); + } + + // Udpate LiveIntervals. + if (LIS) + LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs); +} diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp new file mode 100644 index 0000000..a95ebcd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -0,0 +1,215 @@ +//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is an extremely simple version of the SimplifyCFG pass. Its sole +// job is to delete LLVM basic blocks that are not reachable from the entry +// node. To do this, it performs a simple depth first traversal of the CFG, +// then deletes any unvisited nodes. +// +// Note that this pass is really a hack. In particular, the instruction +// selectors for various targets should just not generate code for unreachable +// blocks. Until LLVM has a more systematic way of defining instruction +// selectors, however, we cannot really expect them to handle additional +// complexity. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +namespace { + class UnreachableBlockElim : public FunctionPass { + virtual bool runOnFunction(Function &F); + public: + static char ID; // Pass identification, replacement for typeid + UnreachableBlockElim() : FunctionPass(ID) { + initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + AU.addPreserved<ProfileInfo>(); + } + }; +} +char UnreachableBlockElim::ID = 0; +INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim", + "Remove unreachable blocks from the CFG", false, false) + +FunctionPass *llvm::createUnreachableBlockEliminationPass() { + return new UnreachableBlockElim(); +} + +bool UnreachableBlockElim::runOnFunction(Function &F) { + SmallPtrSet<BasicBlock*, 8> Reachable; + + // Mark all reachable blocks. + for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I = + df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<BasicBlock*> DeadBlocks; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (!Reachable.count(I)) { + BasicBlock *BB = I; + DeadBlocks.push_back(BB); + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); + BB->getInstList().pop_front(); + } + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + // Actually remove the blocks now. + ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>(); + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + if (PI) PI->removeBlock(DeadBlocks[i]); + DeadBlocks[i]->eraseFromParent(); + } + + return DeadBlocks.size(); +} + + +namespace { + class UnreachableMachineBlockElim : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + MachineModuleInfo *MMI; + public: + static char ID; // Pass identification, replacement for typeid + UnreachableMachineBlockElim() : MachineFunctionPass(ID) {} + }; +} +char UnreachableMachineBlockElim::ID = 0; + +INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination", + "Remove unreachable machine basic blocks", false, false) + +char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID; + +void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + bool ModifiedPHI = false; + + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + + // Mark all reachable blocks. + for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > + I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); + I != E; ++I) + /* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<MachineBasicBlock*> DeadBlocks; + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + + // Test for deadness. + if (!Reachable.count(BB)) { + DeadBlocks.push_back(BB); + + // Update dominator and loop info. + if (MLI) MLI->removeBlock(BB); + if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); + + while (BB->succ_begin() != BB->succ_end()) { + MachineBasicBlock* succ = *BB->succ_begin(); + + MachineBasicBlock::iterator start = succ->begin(); + while (start != succ->end() && start->isPHI()) { + for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) + if (start->getOperand(i).isMBB() && + start->getOperand(i).getMBB() == BB) { + start->RemoveOperand(i); + start->RemoveOperand(i-1); + } + + start++; + } + + BB->removeSuccessor(BB->succ_begin()); + } + } + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + DeadBlocks[i]->eraseFromParent(); + + // Cleanup PHI nodes. + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + // Prune unneeded PHI entries. + SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), + BB->pred_end()); + MachineBasicBlock::iterator phi = BB->begin(); + while (phi != BB->end() && phi->isPHI()) { + for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) + if (!preds.count(phi->getOperand(i).getMBB())) { + phi->RemoveOperand(i); + phi->RemoveOperand(i-1); + ModifiedPHI = true; + } + + if (phi->getNumOperands() == 3) { + unsigned Input = phi->getOperand(1).getReg(); + unsigned Output = phi->getOperand(0).getReg(); + + MachineInstr* temp = phi; + ++phi; + temp->eraseFromParent(); + ModifiedPHI = true; + + if (Input != Output) { + MachineRegisterInfo &MRI = F.getRegInfo(); + MRI.constrainRegClass(Input, MRI.getRegClass(Output)); + MRI.replaceRegWith(Output, Input); + } + + continue; + } + + ++phi; + } + } + + F.RenumberBlocks(); + + return (DeadBlocks.size() || ModifiedPHI); +} diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp new file mode 100644 index 0000000..cd012d2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -0,0 +1,359 @@ +//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the VirtRegMap class. +// +// It also contains implementations of the Spiller interface, which, given a +// virtual register map and a machine function, eliminates all virtual +// references by replacing them with physical register references - adding spill +// code as necessary. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/VirtRegMap.h" +#include "LiveDebugVariables.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumSpillSlots, "Number of spill slots allocated"); +STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); + +//===----------------------------------------------------------------------===// +// VirtRegMap implementation +//===----------------------------------------------------------------------===// + +char VirtRegMap::ID = 0; + +INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false) + +bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { + MRI = &mf.getRegInfo(); + TII = mf.getTarget().getInstrInfo(); + TRI = mf.getTarget().getRegisterInfo(); + MF = &mf; + + Virt2PhysMap.clear(); + Virt2StackSlotMap.clear(); + Virt2SplitMap.clear(); + + grow(); + return false; +} + +void VirtRegMap::grow() { + unsigned NumRegs = MF->getRegInfo().getNumVirtRegs(); + Virt2PhysMap.resize(NumRegs); + Virt2StackSlotMap.resize(NumRegs); + Virt2SplitMap.resize(NumRegs); +} + +unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + ++NumSpillSlots; + return SS; +} + +bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) { + unsigned Hint = MRI->getSimpleHint(VirtReg); + if (!Hint) + return 0; + if (TargetRegisterInfo::isVirtualRegister(Hint)) + Hint = getPhys(Hint); + return getPhys(VirtReg) == Hint; +} + +bool VirtRegMap::hasKnownPreference(unsigned VirtReg) { + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg); + if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) + return true; + if (TargetRegisterInfo::isVirtualRegister(Hint.second)) + return hasPhys(Hint.second); + return false; +} + +int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); + return Virt2StackSlotMap[virtReg] = createSpillSlot(RC); +} + +void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + assert((SS >= 0 || + (SS >= MF->getFrameInfo()->getObjectIndexBegin())) && + "illegal fixed frame index"); + Virt2StackSlotMap[virtReg] = SS; +} + +void VirtRegMap::print(raw_ostream &OS, const Module*) const { + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { + OS << '[' << PrintReg(Reg, TRI) << " -> " + << PrintReg(Virt2PhysMap[Reg], TRI) << "] " + << MRI->getRegClass(Reg)->getName() << "\n"; + } + } + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { + OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] + << "] " << MRI->getRegClass(Reg)->getName() << "\n"; + } + } + OS << '\n'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VirtRegMap::dump() const { + print(dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// VirtRegRewriter +//===----------------------------------------------------------------------===// +// +// The VirtRegRewriter is the last of the register allocator passes. +// It rewrites virtual registers to physical registers as specified in the +// VirtRegMap analysis. It also updates live-in information on basic blocks +// according to LiveIntervals. +// +namespace { +class VirtRegRewriter : public MachineFunctionPass { + MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + LiveIntervals *LIS; + VirtRegMap *VRM; + + void rewrite(); + void addMBBLiveIns(); +public: + static char ID; + VirtRegRewriter() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual bool runOnMachineFunction(MachineFunction&); +}; +} // end anonymous namespace + +char &llvm::VirtRegRewriterID = VirtRegRewriter::ID; + +INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) + +char VirtRegRewriter::ID = 0; + +void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + TM = &MF->getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + MRI = &MF->getRegInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + LIS = &getAnalysis<LiveIntervals>(); + VRM = &getAnalysis<VirtRegMap>(); + DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" + << "********** Function: " + << MF->getName() << '\n'); + DEBUG(VRM->dump()); + + // Add kill flags while we still have virtual registers. + LIS->addKillFlags(VRM); + + // Live-in lists on basic blocks are required for physregs. + addMBBLiveIns(); + + // Rewrite virtual registers. + rewrite(); + + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); + return true; +} + +// Compute MBB live-in lists from virtual register live ranges and their +// assignments. +void VirtRegRewriter::addMBBLiveIns() { + SmallVector<MachineBasicBlock*, 16> LiveIn; + for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { + unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); + if (MRI->reg_nodbg_empty(VirtReg)) + continue; + LiveInterval &LI = LIS->getInterval(VirtReg); + if (LI.empty() || LIS->intervalIsInOneMBB(LI)) + continue; + // This is a virtual register that is live across basic blocks. Its + // assigned PhysReg must be marked as live-in to those blocks. + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); + + // Scan the segments of LI. + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E; + ++I) { + if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn)) + continue; + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) + if (!LiveIn[i]->isLiveIn(PhysReg)) + LiveIn[i]->addLiveIn(PhysReg); + LiveIn.clear(); + } + } +} + +void VirtRegRewriter::rewrite() { + SmallVector<unsigned, 8> SuperDeads; + SmallVector<unsigned, 8> SuperDefs; + SmallVector<unsigned, 8> SuperKills; + + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + DEBUG(MBBI->print(dbgs(), Indexes)); + for (MachineBasicBlock::instr_iterator + MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { + MachineInstr *MI = MII; + ++MII; + + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + unsigned VirtReg = MO.getReg(); + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && + "Instruction uses unmapped VirtReg"); + assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); + + // Preserve semantics of sub-register operands. + if (MO.getSubReg()) { + // A virtual register kill refers to the whole register, so we may + // have to add <imp-use,kill> operands for the super-register. A + // partial redef always kills and redefines the super-register. + if (MO.readsReg() && (MO.isDef() || MO.isKill())) + SuperKills.push_back(PhysReg); + + if (MO.isDef()) { + // The <def,undef> flag only makes sense for sub-register defs, and + // we are substituting a full physreg. An <imp-use,kill> operand + // from the SuperKills list will represent the partial read of the + // super-register. + MO.setIsUndef(false); + + // Also add implicit defs for the super-register. + if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); + } + + // PhysReg operands cannot have subregister indexes. + PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); + assert(PhysReg && "Invalid SubReg for physical register"); + MO.setSubReg(0); + } + // Rewrite. Note we could have used MachineOperand::substPhysReg(), but + // we need the inlining here. + MO.setReg(PhysReg); + } + + // Add any missing super-register kills after rewriting the whole + // instruction. + while (!SuperKills.empty()) + MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + + while (!SuperDeads.empty()) + MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + + while (!SuperDefs.empty()) + MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + + DEBUG(dbgs() << "> " << *MI); + + // Finally, remove any identity copies. + if (MI->isIdentityCopy()) { + ++NumIdCopies; + if (MI->getNumOperands() == 2) { + DEBUG(dbgs() << "Deleting identity copy.\n"); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); + } else { + // Transform identity copy to a KILL to deal with subregisters. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "Identity copy: " << *MI); + } + } + } + } + + // Tell MRI about physical registers in use. + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) + if (!MRI->reg_nodbg_empty(Reg)) + MRI->setPhysRegUsed(Reg); +} |